def extract_closest( fname=None, df=None, ): ''' Parse the output of 'bedtools closest' ''' if df is None: df = pyutil.readData(fname, header=None, ext='tsv', guess_index=False) # df = df.dropna() header = bedHeader + pyutil.paste0([['feature_'], bedHeader]).tolist() df = df.iloc[:, :18] df.columns = header[:17] + ['distance'] df['hit'] = df['feature_acc'] return df
print fnames import sys vals = map(sptn.getBrachy, fnames) bnames = map(lambda x: x.split('/')[-1].rsplit('.', 2)[0].replace('_', '-'), fnames) for i, d in enumerate(vals): d['Alias'] = bnames[i] flats = pyutil.meta2flat([[(x, d.pop(x)) for x in ['RunID', 'SampleID', 'Alias']] for d in vals]) nonEmptyVals = [{k: v for k, v in d.items() if v is not None} for d in vals] conds = pyutil.meta2flat(nonEmptyVals) colNames = pyutil.paste0([flats, conds]) print colNames #print flats,conds #print bnames #print vals #sys.exit(0) def callback(df): df = pyutil.filterMatch(df, key='STRG', negate=1) return df df = pyutil.Table2Mat(fnames, callback=callback, valCol='TPM',
vals = map(getLight, fnames) meta['light'] = vals vals = map(getGenotype, fnames) vals = ['Bd21' if x == 'BdWT' else x for x in vals] meta['gtype'] = vals meta = pd.DataFrame(meta) meta.loc[meta['RunID'] == '143R', 'light'] = 'LD' # meta[meta['RunI']] meta['Age_int'] = map(Rid2Age.get, meta['RunID']) meta['Age'] = ['Wk%d' % x for x in meta['Age_int']] #### Discussed with Mingjun on June 6th about 143R # meta['ZTime'][[4,5]] = 'ZT0' meta.loc[(meta['RunID'] == '143R') & (meta['sampleID'].isin(['S5', 'S6'])), 'ZTime'] = 'ZT0' # meta['ZTime'][[4,5]] = 'ZT0' vals = [int(re.sub('[^-\d]', '', x)) for x in meta['ZTime']] vals = [24 + x if x < 0 else x for x in vals] meta['ZTime_int'] = vals meta['ZTime'] = pyutil.paste0([['ZT'] * len(meta), meta['ZTime_int']]) meta['fname'] = list(fnames) meta.to_csv('meta.csv') meta # sys.exit(0)
def meta2name(meta, keys=['gtype', 'light', 'Age', 'ZTime']): res = pyutil.paste0([meta[k] for k in keys], '_') return res