#
from __future__ import division
import _config
import sys, os, fnmatch, datetime, subprocess
sys.path.append('/home/unix/maxwshen/')
import numpy as np
from collections import defaultdict
from mylib import util, compbio
import pandas as pd

# Default params
inp_dir = _config.OUT_PLACE + f'pb_d_major_subset/'
NAME = util.get_fn(__file__)
out_dir = _config.OUT_PLACE + NAME + '/'
util.ensure_dir_exists(out_dir)

exp_design = pd.read_csv(_config.DATA_DIR + f'Badran2015_SraRunTable.csv')
pacbio_nms = exp_design[exp_design['Instrument'] ==
                        'PacBio RS II']['Library Name']
pacbio_nms = sorted(pacbio_nms)

params = {
    # 'mutations': 'VIWASDNGERIPYCDKSKL',
    # 'wt':        'AACVFGTEAQTSDSNENES',
    'mutations': 'VIWSGDNGERIPYCDKSKL',
    'wt': 'AACFRGTEAQTSDSNENES',
    'wt_dots': '...................',
    'sim_read_lens': [
        1,
        50,
        75,
コード例 #2
0
ファイル: a_worddb.py プロジェクト: maxwshen/relationships
import _config
import sys, os, fnmatch, datetime, subprocess
import nltk, pattern.en

from nltk.stem import WordNetLemmatizer
wnl = WordNetLemmatizer()
from pattern.en import conjugate

from mylib import util


# Default params
# DEFAULT_INP_DIR = _config.DATA_DIR + 'google10k/google-10000-english-usa.txt'
DEFAULT_INP_DIR = [_config.NOUN_FN, _config.VERB_FN] 
NAME = util.get_fn(__file__)

# Functions

def pluralize(word):
  lemma = wnl.lemmatize(word, 'n')
  plural = True if word is not lemma else False
  if not plural:
    out = pattern.en.pluralize(word)
  else:
    out = word
  return out

def make_nouns(inp_fn, out_dir):
  nouns = set()
  with open(inp_fn) as f: