def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot correlation bar of multiple condition') # parser.add_argument('--condition_ls', type=str, default='seq+shape:seq:shape', help='Condition list') # parser.add_argument('--correlation_ls', type=str, default='0.78:0.305:0.343', help='Correlation list') # parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/condition_compare_correlation.pdf', help='Path to plot file') parser.add_argument( '--condition_ls', type=str, default= 'HEK293(wc, vivo):HEK293(wc, vitro):HEK293(ch, vivo):HEK293(np, vivo):HEK293(cy, vivo):mES(wc, vivo)', help='Condition list') parser.add_argument('--correlation_ls', type=str, default='0.78:0.702:0.704:0.761:0.723:0.675', help='Correlation list') parser.add_argument( '--savefn', type=str, default= '/home/gongjing/project/shape_imputation/results/condition_compare_correlation.vivo_vitro_WcChNpCy.pdf', help='Path to plot file') # get args args = parser.parse_args() util.print_args('Plot correlation bar of multiple condition', args) plot_corr_bar(condition_ls=args.condition_ls, correlation_ls=args.correlation_ls, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot dms-seq m6AC score dist') parser.add_argument( '--shape_ls', type=str, default= '/home/gongjing/project/shape_imputation/data/RBMbase/download_20191204/RMBase_hg38_all_m6A_site.tran.e1.tx_has_shape_base_valid.bed.shape100.txt:/home/gongjing/project/shape_imputation/exper/d06_DMSseq_K562_vitro_trainRandmask0.3x50_vallownull100_lossDMSloss_all/prediction.dmsseq_k562_vivo_m6A_null.txt', help='List of shape file') parser.add_argument('--label_ls', type=str, default='valid:null_predict', help='Label list') parser.add_argument( '--savefn', type=str, default= '/home/gongjing/project/shape_imputation/results/dmsseq_m6AC_valid_vs_nullpredict.pdf', help='Savefn pdf') args = parser.parse_args() util.print_args('Plot dms-seq m6AC score dist', args) plot_m6AC(shape_ls=args.shape_ls, label_ls=args.label_ls, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot AUC for single known structure') parser.add_argument( '--AUC_txt_ls', type=str, default= '/home/gongjing/project/shape_imputation/exper/b28_trainLossall_GmultiplyX_randomNperfragmentpct0.3L20x10_randomNperValidate/prediction.rfam.AUCs.txt:/home/gongjing/project/shape_imputation/exper/b92_trainLossall_shapeOnly_x10/prediction.rfam.AUCs.txt:/home/gongjing/project/shape_imputation/exper/b91_trainLossall_seqOnly_x10/prediction.rfam.AUCs.txt', help='AUC file list') parser.add_argument('--AUC_label_ls', type=str, default='seq+shape:shape:seq', help='AUC label list') parser.add_argument( '--savefn', type=str, default= '/home/gongjing/project/shape_imputation/results/condition_compare_AUC_rfam.pdf', help='Pdf file to save plot') # get args args = parser.parse_args() util.print_args(parser.description, args) compare_AUCs(AUC_txt_ls=args.AUC_txt_ls, AUC_label_ls=args.AUC_label_ls, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot correlation bar of multiple condition') parser.add_argument( '--condition_ls', type=str, default= 'hek_wc_vivo:hek_wc_vitro:hek_ch_vivo:hek_np_vivo:hek_cy_vivo:mes_wc_vivo', help='Condition list') parser.add_argument('--SHAPEImpute_ls', type=str, default='0.766:0.715:0.699:0.755:0.710:0.666', help='Correlation list') parser.add_argument('--ShaKer_ls', type=str, default='0.274:0.256:0.226:0.228:0.242:0.264', help='Correlation list') parser.add_argument( '--savefn', type=str, default= '/home/gongjing/project/shape_imputation/results/condition_compare_correlation.shapeimpute_vs_shaker_multiple_set.pdf', help='Path to plot file') # get args args = parser.parse_args() util.print_args('Plot correlation bar of multiple condition', args) plot_corr_bar(condition_ls=args.condition_ls, SHAPEImpute_ls=args.SHAPEImpute_ls, ShaKer_ls=args.ShaKer_ls, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description= 'Compare replicates correlation between true and predicted shape') parser.add_argument('--rep1_out', type=str, help='Rep1 shape.out') parser.add_argument('--rep1_validate', type=str, help='Rep1 fragment') parser.add_argument('--rep1_predict', type=str, help='Rep1 fragment prediction') parser.add_argument('--rep2_out', type=str, help='Rep2 shape.out') parser.add_argument('--rep2_validate', type=str, help='Rep2 fragment') parser.add_argument('--rep2_predict', type=str, help='Rep2 fragment prediction') parser.add_argument('--tx_null_pct', type=float, default=0.3, help='Cutoff filtering fragment with null pct') parser.add_argument('--savefn', type=str, help='Pdf file to save plot') # get args args = parser.parse_args() util.print_args(parser.description, args) rep_compare(rep1_out=args.rep1_out, rep1_validate=args.rep1_validate, rep1_predict=args.rep1_predict, rep2_out=args.rep2_out, rep2_validate=args.rep2_validate, rep2_predict=args.rep2_predict, tx_null_pct=args.tx_null_pct, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Split into train & validation based on blastn') parser.add_argument( '--blastn', type=str, default= '/home/gongjing/project/shape_imputation/data/seq_similarity/windowLen100.sliding10.all2.outputfile_E10', help='Path to blastn file') parser.add_argument( '--txt', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding10.txt', help='Path to all fragment file') parser.add_argument('--validation_pct', type=float, default=0.2, help='Valiation percentage') # get args args = parser.parse_args() util.print_args('Split into train & validation based on blastn', args) split(blastn=args.blastn, txt=args.txt, validation_pct=args.validation_pct)
def main(): # get argument args = util.get_args() # print argument util.print_args(args) # run te mlus = [] rcs = [] # at every T step for t in range(args.num_test): if t % args.T == 0: repetita_args = util.get_repetita_args(args, t) print('command:', ' '.join(repetita_args)) stdout = util.call(repetita_args) if stdout: print('stdout:', stdout) mlu, rc = util.parse_result(t, stdout, args) if len(mlu) == args.T: mlus.append(mlu) if rc is not None: rcs.append(rc) util.save(mlus, rcs, args)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Sort shape by NULL count') parser.add_argument( '--shape1', type=str, default= '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip/CLIPDB20162_IGF2BP1_HEK293_trx.tx_has_shape_region_null_ok.fimo/fimo.new.IGF2BP1_11.txt.shape100.txt', help='Path to shape2') parser.add_argument( '--shape2', type=str, default= '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip/CLIPDB20162_IGF2BP1_HEK293_trx.tx_has_shape_region_null_ok.fimo/fimo.new.IGF2BP1_11.txt.shape', help='Path to shape1') parser.add_argument('--value_col1', type=int, default=7, help='Which column index to sort') # get args args = parser.parse_args() util.print_args(parser.description, args) sort(shape1=args.shape1, shape2=args.shape2, value_col1=args.value_col1)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='RNA modification site analysis') parser.add_argument( '--modification_bed', type=str, default= '/home/gongjing/project/shape_imputation/data/RBMbase/download_20191204/RMBase_hg38_all_PseudoU_site.tran.bed', help='Modification bed file') parser.add_argument( '--icshape', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out', help='icSHAPE out file') parser.add_argument('--label', type=str, default='e1') # get args args = parser.parse_args() util.print_args('RNA modification site analysis', args) # modification_has_shape(bed=args.modification_bed, out=args.icshape) modification_extend_1_has_shape( bed=args.modification_bed, out=args.icshape, label=args.label) # for dms-seq, check xxACx, C=>e1
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description= 'Retain common fragments in two validation file with different depth') parser.add_argument('--fragment_high', type=str, help='Path to fragment file with depth1') parser.add_argument('--fragment_low', type=str, help='Path to fragment file with depth2') parser.add_argument('--fragment_common', type=str, help='Path to fragment file with depth2') parser.add_argument('--savefn', type=str, help='Path to save plot file') parser.add_argument( '--process_type', type=str, default='generate_common_fragment|corr_of_common_fragment', help='process') # get args args = parser.parse_args() util.print_args( 'Retain common fragments in two validation file with different depth', args) if args.process_type == 'generate_common_fragment': fragment_for_test(fragment_high=args.fragment_high, fragment_low=args.fragment_low) if args.process_type == 'corr_of_common_fragment': fragment_compare_corr(fragment_common=args.fragment_common, savefn=args.savefn)
def extract_process_kmers(name): """Extract k-mers from genomic sequence and run initial processing. Load project arguments and produce three files: extract k-mers from the genome: <name>/<name>_kmers.txt.gz shuffle all extracted k-mers: <name>/<name>_kmers_shuffled.txt.gz count occurrences of k-mers: <name>/<name>_kmers_counts.txt.gz Args: name: project name, used to get project args and in all output """ util.print_log('start extract_process_kmers()') util.print_log('load arguments...') args = util.load_args(name) util.print_args(args) util.print_log('done') util.print_log('load FASTA...') util.print_log('load from %s' % args['fasta']) fasta = load_fasta(args['fasta']) util.print_log('done') util.print_log('extract k-mers...') kmers_filename = '%s/%s_kmers.txt.gz' % (name, name) allpams = [args['pam']] + args['altpam'] util.print_log('write in file %s' % kmers_filename) genome = extract_kmers(name=name, fasta=fasta, length=args['length'], pams=allpams, pampos=args['pampos'], filename=kmers_filename, chroms=args['chrom'], minchrlen=args['minchrlen'], processes=args['processes']) sys.stdout.write('genome: %s' % genome) util.print_log('save genome info') args['genome'] = genome util.save_args(args) util.print_log('calculate k-mer statistics') print_stats_kmers(kmers_filename, gnupath=args['gnupath']) util.print_log('done') util.print_log('shuffle k-mers...') kmers_shuffled_filename = '%s/%s_kmers_shuffled.txt.gz' % (name, name) util.print_log('write in file %s' % kmers_shuffled_filename) shuffle_kmers(fileinput=kmers_filename, fileoutput=kmers_shuffled_filename, gnupath=args['gnupath']) util.print_log('done') util.print_log('count k-mers...') count_filename = '%s/%s_kmers_counts.txt.gz' % (name, name) util.print_log('write in file %s' % count_filename) sort_count_kmers(fileinput=kmers_filename, fileoutput=count_filename, mincount=args['maxoffpos'], gnupath=args['gnupath']) util.print_log('done') return True
def produce_bams_main(kmers_trie, name): """Produce BAM file with all guideRNAs and info about their off-targets. Run after all files and trie were generated by kmers.extract_process_kmers() and guides.analyze_guides() Produce files: sorted BAM file with off-target info: <name>/<name>_guides.bam index for the BAM file with off-target info: <name>/<name>_guides.bam.bai also, BAM file and index for all guideRNAs without any off-target info (produced much faster): <name>/<name>_guides_nooff.bam <name>/<name>_guides_nooff.bam.bai Args: kmers_trie: trie.trie object as produced by guides.analyze_guides() name: project name, used to get project args and in all output """ util.print_log('start produce_bam()') util.print_log('load arguments...') args = util.load_args(name) util.print_args(args) util.print_log('done') util.print_log('produce SAM file with guideRNAs only (no off-targets)...') # guides_filename = '%s/%s_guides.txt.gz' % (name, name) # parts = 256 n = args['greateroffdist'] parts = 4 ** n guides_dir = '%s%s' % (name,'/classifiedfiles/guides') guides_filenames = ['%s/%s.txt.gz' % (guides_dir, i) for i in range(parts)] util.print_log('read guides from %s' % guides_dir) produce_bam_custom(kmers_trie=kmers_trie, name=name, label='nooff', guides_filename=guides_filenames, args=args, offdist=-1, # -1 for no off-targets maxoffcount=args['maxoffcount'], processes=args['processes'], n = n, parts=parts) util.print_log('done') if args['offdist'] != -1: util.print_log('produce SAM file with guideRNAs' ' and off-target info...') # guides_filename = '%s/%s_guides.txt.gz' % (name, name) util.print_log('read guides from %s' % guides_dir) produce_bam_custom(kmers_trie=kmers_trie, name=name, label='offdist%s' % args['offdist'], guides_filename=guides_filenames, args=args, offdist=args['offdist'], maxoffcount=args['maxoffcount'], processes=args['processes'], n = n, parts=parts) util.print_log('done')
def main(): # get argument args = util.get_args() # print argument util.print_args(args) # at every T + 1 step t = 1995 repetita_args = util.get_repetita_args(args, t) print('command:', ' '.join(repetita_args)) stdout = util.call(repetita_args) print('stdout:', stdout)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Parse dataset of bpRNA in SPOT-RNA project') parser.add_argument('--d', type=str, help='Dir of bpRNA') parser.add_argument('--savefn', type=str, help='File to save parsed info') # get args args = parser.parse_args() util.print_args(parser.description, args) read_dir_rfam(d=args.d, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Plot NULL percentage along iterations') parser.add_argument('--stat_ls', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/t.out.predict/iteration.stat.txt,/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.predict/iteration.stat.txt', help='List of stat file') parser.add_argument('--label_ls', type=str, default='test,hek_wc_vivo', help='Label list') parser.add_argument('--corr_ls', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.c80.newwithNULL.nominus.predict/corr.txt', help='Correlation text list') parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/null_pct_interation.pdf', help='Savefn pdf') args = parser.parse_args() util.print_args('Plot NULL percentage along iterations', args) plot_iteration(stat_ls=args.stat_ls, label_ls=args.label_ls, savefn=args.savefn, corr_ls=args.corr_ls)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Plot correlation bar of multiple condition') parser.add_argument('--validation_ls', type=str, help='Validation file list') parser.add_argument('--predict_ls', type=str, help='Predict file list') parser.add_argument('--label_ls', type=str, help='Lable list') parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/condition_compare_correlation.track.wc_vs_cy.pdf', help='Path to plot file') # get args args = parser.parse_args() util.print_args('Plot correlation bar of multiple condition', args) compare_predict(validation_ls=args.validation_ls, predict_ls=args.predict_ls, label_ls=args.label_ls, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Plot correlation bar of multiple condition') parser.add_argument('--condition_ls', type=str, default='0.1:0.2:0.3:0.4:0.5)', help='Condition list') parser.add_argument('--correlation_ls', type=str, default='0.940:0.915:0.885:0.810:0.755', help='Correlation list') parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/results/condition_compare_correlation.18S.pdf', help='Path to plot file') # get args args = parser.parse_args() util.print_args('Plot correlation bar of multiple condition', args) plot_corr_bar(condition_ls=args.condition_ls, correlation_ls=args.correlation_ls, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Extract start/stop condon shape') parser.add_argument('--species', type=str, default='human', help='human') parser.add_argument('--icshape', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out', help='icSHAPE out file') parser.add_argument('--savefn', type=str, help='Pdf file to save plot', default='/home/gongjing/project/shape_imputation/results/start_stop_codon/hek_wc.shape') # get args args = parser.parse_args() util.print_args(parser.description, args) extract_start_codon_shape(species=args.species, shape=args.icshape, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Compare two shape column in a fragment file') parser.add_argument('--fragment', type=str, help='fragment file') parser.add_argument('--savefn', type=str, help='Pdf file to save plot') # get args args = parser.parse_args() util.print_args(parser.description, args) compare(fragment=args.fragment, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Complete a input shape.out') parser.add_argument( '--icshape', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out', help='icSHAPE out file') parser.add_argument('--species', type=str, default='human', help='Species') parser.add_argument('--predict_label', type=str, default='wc_all_fragment', help='Predict_label') parser.add_argument( '--predict_model', type=str, default= 'b28_trainLossall_GmultiplyX_randomNperfragmentpct0.3L20x10_randomNperValidate2', help='Model used to predict') parser.add_argument('--pct', type=float, default=0.5, help='Max NULL percentage in fragment to predict') parser.add_argument('--window_len', type=int, default=100, help='window_len') parser.add_argument('--sliding', type=int, default=10, help='sliding') parser.add_argument('--shape_null_pct', type=float, default=0.3, help='Stop predict when remains pct(NULL) <= cutoff') parser.add_argument('--gpu_id', type=str, default="1", help='GPU id') # get args args = parser.parse_args() util.print_args('Complete a input shape.out', args) complete_shape_out_nullpct(icshape=args.icshape, species=args.species, predict_label=args.predict_label, predict_model=args.predict_model, pct=args.pct, window_len=args.window_len, sliding=args.sliding, shape_null_pct=args.shape_null_pct, gpu_id=args.gpu_id)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='CLIP data analysis') parser.add_argument( '--clip_bed', type=str, default= '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip/STARBASE20007_DGCR8_HEK293T_trx.bed', help='Bed file of CLIP data') parser.add_argument( '--clip_bed_dir', type=str, default= '/home/gongjing/project/shape_imputation/data/CLIP/human_trx_clip', help='Dir to bed file') parser.add_argument('--bed_peak_len', type=int, default=10, help='Min peak length to keep') parser.add_argument( '--icshape', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out', help='icSHAPE out file') parser.add_argument('--max_null_pct', type=float, default=0.4, help='Max percentage of NULL values in peak regions') parser.add_argument( '--clip_table_list', type=str, default= '/home/gongjing/project/shape_imputation/data/CLIP/human.RBP.CLIP.combined.tbl', help='All CLIP table list') # get args args = parser.parse_args() util.print_args('CLIP data analysis', args) # read_bed(bed=args.clip_bed) # iclip_has_shape(bed=args.clip_bed, bed_peak_len=args.bed_peak_len, out=args.icshape, max_null_pct=args.max_null_pct) iclip_has_shape_batch(clip_table_list=args.clip_table_list, clip_bed_dir=args.clip_bed_dir, bed_peak_len=args.bed_peak_len, out=args.icshape, max_null_pct=args.max_null_pct)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot NULL corr across iterations') parser.add_argument( '--icshape', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.c80', help='predict file dir') parser.add_argument( '--predict_dir', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.predict', help='predict file dir') parser.add_argument( '--validation', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/validation_randomnullfragment/windowLen100.sliding100.validation.randomNperfragmentNullPct0.3.maxL20.S1234.txt', help='validation file') parser.add_argument('--max_iterations', type=int, default=100, help='plot <= max_iterations') parser.add_argument('--tx', type=str, default='ENST00000331434', help='shape plot of tx') parser.add_argument( '--icshape_true', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/test_prediction/hek_wc.out.c80', help='predict file dir') # get args args = parser.parse_args() util.print_args('Plot NULL corr across iterations', args) # plot_dir_null_corr(args) # generate_new_shape_out_with_validation_null(args) plot_tx_shape_iterations(args)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Generate N DA data for a train set') parser.add_argument('--txt', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/train_randomnullfragment_DA/windowLen100.sliding100.train.txt', help='Path to blastn file') parser.add_argument('--seed', type=int, default=1234, help='random seed') parser.add_argument('--times', type=int, default=20, help='DA times') parser.add_argument('--strategy', type=str, default='random', help='DA strategy: random|shadow_null_shuffle') # get args args = parser.parse_args() util.print_args('Generate N DA data for a train set', args) random.seed(args.seed) data_agumentation(txt=args.txt, times=args.times, strategy=args.strategy)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Search motif from a fasta') parser.add_argument('--species', type=str, default='human', help='Species') parser.add_argument( '--savefn', type=str, default= '/home/gongjing/project/shape_imputation/data/RBMbase/download_20191204/hg38_m6A_motif.bed', help='Savefn') # get args args = parser.parse_args() util.print_args('Search motif from a fasta', args) search(species=args.species, savefn=args.savefn)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Set random NULL position as other signal') parser.add_argument( '--txt', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo_rRNA/3.shape/shape.c200T2M0m0.out.windowsHasNull/windowLen100.sliding100.fulllength18S.validation_randomNULL0.3.txt', help='validation data set') # get args args = parser.parse_args() util.print_args('Set random NULL position as other signal', args) convert(txt=args.txt)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot null pct scatter of common tx between two icshape.out' ) parser.add_argument( '--icshape1', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out', help='icSHAPE out file1') parser.add_argument( '--icshape2', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.allfragment.0.5+exceed0.5.txt2.predict.out', help='icSHAPE out file2') parser.add_argument('--out1_label', type=str, default='True', help='icSHAPE out file1 label') parser.add_argument('--out2_label', type=str, default='Predict', help='icSHAPE out file2 label') parser.add_argument( '--savefn', type=str, default= '/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.allfragment.0.5+exceed0.5.txt2.predict.out.scatter.pdf', help='Save plot file') parser.add_argument('--species', type=str, default='human', help='Species') # get args args = parser.parse_args() util.print_args( 'Plot null pct scatter of common tx between two icshape.out', args) plot_shape_tx_null_pct(out1=args.icshape1, out2=args.icshape2, out1_label=args.out1_label, out2_label=args.out2_label, savefn=args.savefn, species=args.species)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Generate a shuffle fragment based on a null pattern') parser.add_argument( '--txt', type=str, default= '/data/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/shape.c200T2M0m0.out.windowsHasNull/low_depth_null/sampling/windowLen100.sliding100.validation.low_60_1234.null_pattern.txt', help='Path to dir') args = parser.parse_args() util.print_args('Generate a shuffle fragment based on a null pattern', args) null_pattern_to_bed(txt=args.txt)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser(description='Calc NULL/nonNULL value distribution in train/validation set') parser.add_argument('--data', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/c200T2/w100s100.train_null0.1.txt', help='Path to fragment file') parser.add_argument('--col', type=str, default='7:8', help='Columns to calc') parser.add_argument('--savefn', type=str, default='/home/gongjing/project/shape_imputation/data/hek_wc_vivo/3.shape/c200T2/w100s100.train_null0.1.stat.pdf', help='Path to save stat file') parser.add_argument('--dist_type', type=str, default='null_pattern', help='Type of dist') # get args args = parser.parse_args() util.print_args('Calc NULL/nonNULL value distribution in train/validation set', args) if 'null_pattern' in args.dist_type: null_count_dist(data=args.data, col=args.col, savefn=(args.data).replace('.txt', '.pdf')) if 'shape_value' in args.dist_type: reactivity_dist(data=args.data, col=args.col, savefn=args.savefn)
def run_model() -> None: "Execute model according to the configuration" print('#' * 5, 'PARAMETERS', '#' * 5) print_args(ARGS) print('#' * 10, '\n\n') # Which model to use? build_fn, reader_type = common.get_modelfn_reader() reader = common.create_reader(reader_type) def optimiser(model: Model) -> torch.optim.Optimizer: return AdamW(model.parameters(), lr=1e-3, weight_decay=1e-3) # Create SAVE_FOLDER if it doesn't exist ARGS.SAVE_PATH.mkdir(exist_ok=True, parents=True) train_dataset = load_data(data_path=ARGS.TRAIN_DATA_PATH, reader=reader, pre_processed_path=ARGS.TRAIN_PREPROCESSED_PATH) val_dataset = load_data(data_path=ARGS.VAL_DATA_PATH, reader=reader, pre_processed_path=ARGS.VAL_PREPROCESSED_PATH) test_dataset = load_data(data_path=ARGS.TEST_DATA_PATH, reader=reader, pre_processed_path=ARGS.TEST_PREPROCESSED_PATH) model = train_model(build_fn, train_data=train_dataset, val_data=val_dataset, test_data=test_dataset, save_path=ARGS.SAVE_PATH, num_epochs=ARGS.NUM_EPOCHS, batch_size=ARGS.BATCH_SIZE, optimiser_fn=optimiser, cuda_device=ARGS.CUDA_DEVICE, sorting_keys=reader.keys) common.evaluate(model, reader, test_dataset) result = make_prediction(model, reader, verbose=False) common.error_analysis(model, test_dataset) print('Save path', ARGS.SAVE_PATH) cuda_device = 0 if is_cuda(model) else -1 test_load(build_fn, reader, ARGS.SAVE_PATH, result, cuda_device)
def main(): #################################################################### ### define parser of arguments parser = argparse.ArgumentParser( description='Plot AUC for single known structure') parser.add_argument('--dot', type=str, help='Dot file for known structure') parser.add_argument('--validate', type=str, help='Validate fragment file') parser.add_argument('--predict', type=str, help='Predicted fragment file') parser.add_argument('--tx', type=str, help='Transcript to plot') parser.add_argument('--start', type=int, default=0, metavar='N', help='Dot start index') parser.add_argument('--savefn', type=str, help='Pdf file to save plot') parser.add_argument('--title', type=str, default='', help='Title of the plot') parser.add_argument( '--predict_bases', type=str, default='ATCG', help='Bases considered while calc AUC for predict sample') parser.add_argument( '--validate_bases', type=str, default='ATCG', help='Bases considered while calc AUC for validate sample') # get args args = parser.parse_args() util.print_args(parser.description, args) known_structure_compare(dot=args.dot, validate=args.validate, predict=args.predict, tx=args.tx, start=args.start, savefn=args.savefn, title=args.title, predict_bases=args.predict_bases, validate_bases=args.validate_bases)