# from __future__ import division import _config import sys, os, fnmatch, datetime, subprocess sys.path.append('/home/unix/maxwshen/') import numpy as np from collections import defaultdict from mylib import util, compbio import pandas as pd # Default params inp_dir = _config.OUT_PLACE + f'pb_d_major_subset/' NAME = util.get_fn(__file__) out_dir = _config.OUT_PLACE + NAME + '/' util.ensure_dir_exists(out_dir) exp_design = pd.read_csv(_config.DATA_DIR + f'Badran2015_SraRunTable.csv') pacbio_nms = exp_design[exp_design['Instrument'] == 'PacBio RS II']['Library Name'] pacbio_nms = sorted(pacbio_nms) params = { # 'mutations': 'VIWASDNGERIPYCDKSKL', # 'wt': 'AACVFGTEAQTSDSNENES', 'mutations': 'VIWSGDNGERIPYCDKSKL', 'wt': 'AACFRGTEAQTSDSNENES', 'wt_dots': '...................', 'sim_read_lens': [ 1, 50, 75,
import _config import sys, os, fnmatch, datetime, subprocess import nltk, pattern.en from nltk.stem import WordNetLemmatizer wnl = WordNetLemmatizer() from pattern.en import conjugate from mylib import util # Default params # DEFAULT_INP_DIR = _config.DATA_DIR + 'google10k/google-10000-english-usa.txt' DEFAULT_INP_DIR = [_config.NOUN_FN, _config.VERB_FN] NAME = util.get_fn(__file__) # Functions def pluralize(word): lemma = wnl.lemmatize(word, 'n') plural = True if word is not lemma else False if not plural: out = pattern.en.pluralize(word) else: out = word return out def make_nouns(inp_fn, out_dir): nouns = set() with open(inp_fn) as f: