def write_zenodo_files(args, baseoutdir): for study, dset in [v.split('/') for v in args.varvals]: print '%-10s %15s %s' % (study, dset, baseoutdir) metafos = heads.read_metadata(study) for method in args.methods: outdir = get_outdir(args, baseoutdir, varname='data', varval='zenodo/%s/%s/%s' % (study_translations.get(study, study), dset, method.replace('-default', ''))) print ' %-15s' % method write_single_zenodo_subdir(outdir, args, study, dset, method, metafos[dset])
def get_data_plots(args, baseoutdir, methods, study, dsets): metafos = heads.read_metadata(study) assert len(set([metafos[ds]['locus'] for ds in dsets ])) # make sure everybody has the same locus mfo = metafos[dsets[0]] data_outdirs = [ heads.get_datadir( study, 'processed', extra_str='gls-gen-paper-' + args.label) + '/' + ds for ds in dsets ] outdir = get_outdir( args, baseoutdir, varname='data', varval=study + '/' + '-vs-'.join(dsets) ) # for data, only the plots go here, since datascripts puts its output somewhere else if len(dsets) > 1 and len(methods) == 1: # sample vs sample glslabels = dsets title = get_dset_title([metafos[ds] for ds in dsets]) if study != 'kate-qrs': title += ' %s' % methstr(methods[0]) title_color = methods[0] legends = get_dset_legends([metafos[ds] for ds in dsets]) legend_title = methstr( methods[0] ) if study == 'kate-qrs' else None # for kate-qrs we need to put the subject _and_ the isotype in the title, so there's no room for the method pie_chart_faces = False print '%s:' % utils.color('green', methods[0]), elif len(methods) > 1 and len(dsets) == 1: # method vs method glslabels = methods title = get_dset_title([mfo]) title_color = None legends = [methstr(m) + ' only' for m in methods] legend_title = None pie_chart_faces = True print '%s:' % utils.color('green', dsets[0]), else: raise Exception('one of \'em has to be length 1: %d %d' % (len(methods), len(dsets))) print '%s' % (' %s ' % utils.color('light_blue', 'vs')).join(glslabels) make_gls_tree_plot(args, outdir + '/' + '-vs-'.join(methods) + '/gls-gen-plots', study + '-' + '-vs-'.join(dsets), glsfnames=[ get_gls_fname(ddir, meth, locus=mfo['locus'], data=True) for ddir in data_outdirs for meth in methods ], glslabels=glslabels, locus=mfo['locus'], title=title, title_color=title_color, legends=legends, legend_title=legend_title, pie_chart_faces=pie_chart_faces)
def get_data_pair_plots(args, baseoutdir, method, study, dsets): mfo = heads.read_metadata(study)[dsets[0]] assert heads.read_metadata(study)[dsets[1]]['locus'] == mfo['locus'] data_outdirs = [ heads.get_datadir( study, 'processed', extra_str='gls-gen-paper-' + args.label) + '/' + ds for ds in dsets ] outdir = get_outdir( args, baseoutdir, varname='data', varval=study + '/' + '-vs-'.join(dsets) ) # for data, only the plots go here, since datascripts puts its output somewhere else make_gls_tree_plot(args, outdir + '/' + method + '/gls-gen-plots', study + '-' + '-vs-'.join(dsets), glsfnames=[ get_gls_fname(dout, method, locus=mfo['locus'], data=True) for dout in data_outdirs ], glslabels=dsets)
def get_data_plots(args, region, baseoutdir, methods, study, dsets): metafos = heads.read_metadata(study) assert len(set([metafos[ds]['locus'] for ds in dsets])) # make sure everybody has the same locus mfo = metafos[dsets[0]] data_outdirs = [heads.get_datadir(study, 'processed', extra_str=args.label) + '/' + ds for ds in dsets] outdir = get_outdir(args, baseoutdir, varname='data', varval=study + '/' + '-vs-'.join(dsets)) # for data, only the plots go here, since datascripts puts its output somewhere else title, title_color, legends, legend_title = None, None, None, None pie_chart_faces = False if len(dsets) > 1 and len(methods) == 1: # sample vs sample glslabels = dsets title = get_dset_title([metafos[ds] for ds in dsets]) if study != 'kate-qrs': title += ' %s' % methstr(methods[0]) title_color = methods[0] legends = get_dset_legends([metafos[ds] for ds in dsets]) legend_title = methstr(methods[0]) if study == 'kate-qrs' else None # for kate-qrs we need to put the subject _and_ the isotype in the title, so there's no room for the method print '%s:' % utils.color('green', methods[0]), elif len(methods) > 1 and len(dsets) == 1: # method vs method glslabels = methods title = get_dset_title([mfo]) title_color = None legends = [methstr(m) + ' only' for m in methods] legend_title = None pie_chart_faces = len(methods) > 2 # True print '%s:' % utils.color('green', dsets[0]), else: # single sample plot glslabels = dsets print '%s' % (' %s ' % utils.color('light_blue', 'vs')).join(glslabels) plotdir = outdir + '/' + '-vs-'.join(methods) + '/gls-gen-plots' if args.all_regions: # NOTE not actually checking this by running... but it's the same as the gls-gen one, so it should be ok plotdir += '/' + region param_dirs = None if args.add_gene_counts_to_tree_plots: # this returns 'None' for non-partis methods, which is ok for now, but I think I do usually have the parameter dir somewhere if I've run the annotation performance stuff param_dirs = [get_param_dir(ddir, meth) for ddir in data_outdirs for meth in methods] make_gls_tree_plot(args, region, plotdir, study + '-' + '-vs-'.join(dsets), glsfnames=[get_gls_fname(region, ddir, meth, locus=mfo['locus'], data=True) for ddir in data_outdirs for meth in methods], glslabels=glslabels, locus=mfo['locus'], title=title, title_color=title_color, legends=legends, legend_title=legend_title, pie_chart_faces=pie_chart_faces, param_dirs=param_dirs)
def get_data_plots(args, baseoutdir, method): for var in args.varvals: study, dset = var.split('/') mfo = heads.read_metadata(study)[dset] data_outdir = heads.get_datadir( study, 'processed', extra_str='gls-gen-paper-' + args.label) + '/' + dset outdir = get_outdir( args, baseoutdir, varname='data', varval=study + '/' + dset ) # for data, only the plots go here, since datascripts puts its output somewhere else make_gls_tree_plot(args, outdir + '/' + method + '/gls-gen-plots', study + '-' + dset, glsfnames=[ get_gls_fname(data_outdir, method, locus=mfo['locus'], data=True) ], glslabels=['data'])
# ], 'crotty-fna' : [ ['RUj15_ALN-FNA_week3_groupD', 'RUj15_L-ILN-FNA_week3_groupD', 'RUj15_R-ILN-FNA_week3_groupD'], ['RUj15_ALN-FNA_week3_groupD', 'RUj15_L-ILN-FNA_week15_groupD', 'RUj15_R-ILN-FNA_week15_groupD'], ['ROp15_R-ILN-FNA_week3_groupC', 'ROp15_L-ILN-FNA_week3_groupC', 'ROp15_ALN-FNA_week3_groupC'], ['ROp15_R-ILN-FNA_week9_groupC', 'ROp15_L-ILN-FNA_week9_groupC', 'ROp15_ALN-FNA_week9_groupC'], ['ROp15_R-ILN-FNA_week15_groupC', 'ROp15_L-ILN-FNA_week15_groupC', 'ROp15_ALN-FNA_week15_groupC'], ['ROp15_R-ILN-FNA_week21_groupC', 'ROp15_L-ILN-FNA_week21_groupC', 'ROp15_ALN-FNA_week21_groupC'], ['RJk15_L-ILN-FNA_week3_groupB', 'RJk15_R-ILN-FNA_week3_groupB', 'RJk15_ALN-FNA_week3_groupB'], ['RJk15_L-ILN-FNA_week9_groupB', 'RJk15_R-ILN-FNA_week9_groupB'], ['RJk15_L-ILN-FNA_week15_groupB', 'RJk15_R-ILN-FNA_week15_groupB', 'RJk15_ALN-FNA_week15_groupB'], ['RJk15_L-ILN-FNA_week21_groupB', 'RJk15_R-ILN-FNA_week21_groupB', 'RJk15_ALN-FNA_week21_groupB'], ] } default_varvals['data'] = ':'.join([study + '/' + heads.full_dataset(heads.read_metadata(study), dset) for study in default_varvals['data'] for dset in default_varvals['data'][study]]) for study in all_data_groups: for idp in range(len(all_data_groups[study])): all_data_groups[study][idp] = [heads.full_dataset(heads.read_metadata(study), ds) for ds in all_data_groups[study][idp]] # ---------------------------------------------------------------------------------------- parser = argparse.ArgumentParser() parser.add_argument('action', choices=['mfreq', 'nsnp', 'multi-nsnp', 'prevalence', 'n-leaves', 'weibull', 'alcluster', 'gls-gen', 'data']) parser.add_argument('--methods', default='partis') # not using <choices> 'cause it's harder since it's a list parser.add_argument('--method-vs-method', action='store_true') parser.add_argument('--sample-vs-sample', action='store_true') parser.add_argument('--v-genes', default='IGHV4-39*01') parser.add_argument('--locus', default='igh') parser.add_argument('--all-regions', action='store_true') # it'd be nicer to just have an arg for which region we're running on, but i need a way to keep the directory structure for single-region plots the same as before I generalized to d and j parser.add_argument('--varvals') parser.add_argument('--n-event-list', default='1000:2000:4000:8000') # NOTE modified later for multi-nsnp also NOTE not used for gen-gset parser.add_argument('--gls-gen-events', type=int, default=50000)
'RJk15_L-ILN-FNA_week3_groupB', 'RJk15_R-ILN-FNA_week3_groupB', 'RJk15_ALN-FNA_week3_groupB' ], ['RJk15_L-ILN-FNA_week9_groupB', 'RJk15_R-ILN-FNA_week9_groupB'], [ 'RJk15_L-ILN-FNA_week15_groupB', 'RJk15_R-ILN-FNA_week15_groupB', 'RJk15_ALN-FNA_week15_groupB' ], [ 'RJk15_L-ILN-FNA_week21_groupB', 'RJk15_R-ILN-FNA_week21_groupB', 'RJk15_ALN-FNA_week21_groupB' ], ] } default_varvals['data'] = ':'.join([ study + '/' + heads.full_dataset(heads.read_metadata(study), dset) for study in default_varvals['data'] for dset in default_varvals['data'][study] ]) for study in all_data_groups: for idp in range(len(all_data_groups[study])): all_data_groups[study][idp] = [ heads.full_dataset(heads.read_metadata(study), ds) for ds in all_data_groups[study][idp] ] # ---------------------------------------------------------------------------------------- parser = argparse.ArgumentParser() parser.add_argument('action', choices=[ 'mfreq', 'nsnp', 'multi-nsnp', 'prevalence', 'n-leaves', 'weibull', 'alcluster', 'gls-gen', 'data'
import colored_traceback.always sys.path.insert(1, './python') import utils import glutils sys.path.insert(1, './datascripts') import heads scolors = { 'ok': 'DarkSeaGreen', 'missing': 'IndianRed', 'spurious': 'IndianRed', 'data': 'LightSteelBlue', 'both': 'LightGrey', } metafos = heads.read_metadata('kate-qrs') for ds in metafos: if 'LN1' in ds or 'LN2' in ds: scolors[ds] = '#85ad98' # green elif 'LN4' in ds or 'LN3' in ds: scolors[ds] = '#94a3d1' # blue faces = { 'missing': ete3.CircleFace(10, 'white'), 'spurious': ete3.CircleFace(10, 'black') } def get_cmdfos(cmdstr, workdir, outfname): return [{'cmd_str': cmdstr, 'workdir': workdir, 'outfname': outfname}]