def wrapper(args): try: npar = args.noiseparam.strip('[').strip(']').split(',') except: npar = [] nbins = args.nbins # Run funciton if args.i: df = pd.io.parsers.read_csv( args.i,delim_whitespace=True, dtype={'seqs':str,'batch':int}) else: df = pd.io.parsers.read_csv( sys.stdin,delim_whitespace=True, dtype={'seqs':str,'batch':int}) if len(utils.get_column_headers(df)) > 0: raise SortSeqError('Library already sorted!') model_df = io.load_model(args.model) output_df = main( df,model_df,args.noisemodel,npar, nbins,start=args.start,end=args.end) if args.out: outloc = open(args.out,'w') else: outloc = sys.stdout pd.set_option('max_colwidth',int(1e8)) # Validate dataframe for writting output_df = qc.validate_dataset(output_df,fix=True) io.write(output_df,outloc)
def wrapper(args): inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin dataset_df = io.load_dataset(inloc) model_df = io.load_model(args.model) output_df = main(dataset_df=dataset_df, model_df=model_df,\ left=args.left, right=args.right) outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout io.write(output_df,outloc,fast=args.fast)
def wrapper(args): data_df = io.load_dataset(args.dataset) # Take input from standard input or through the -i flag. if args.model: model_df = io.load_model(args.model) else: model_df = io.load_model(sys.stdin) MI,Std = main( data_df,model_df,start=args.start, end=args.end,err=args.err) output_df = pd.DataFrame([MI],columns=['info']) if args.err: output_df = pd.concat([output_df,pd.Series(Std,name='info_err')],axis=1) if args.out: outloc = open(args.out,'w') else: outloc = sys.stdout pd.set_option('max_colwidth',int(1e8)) output_df.to_string( outloc, index=False,col_space=10,float_format=utils.format_string)
import sortseq_tools.qc as qc import sortseq_tools.io as io import os import sortseq_tools.profile_ct as profile_ct import pdb from sortseq_tools import SortSeqError import cProfile import sortseq_tools.profile_info as profile_info import sortseq_tools.learn_model as learn_model import sortseq_tools.predictiveinfo as predictiveinfo import pstats #load in data sets for the test, we will just use the sort-seq crp-wt set df = io.load_dataset('input/mpra.txt') model_df = io.load_model('input/mpra_model') #Profile profile_info #stats_fn = 'Profile_profile_info' #stats_fn_hr = 'Profile_profile_info_hr' #Profile.run('''profile_info.main(df,method='nsb')''',stats_fn) #Reformat and print to human readable profile #p = pstats.Stats(stats_fn,stream=open(stats_fn_hr,'w')) #p.strip_dirs() #p.sort_stats('cumtime') #p.print_stats() df_copy = df.copy() #profile learn_model lm=LS stats_fn = 'profile/Profile_learn_model_LS_mpra'
import sortseq_tools.qc as qc import sortseq_tools.io as io import os import sortseq_tools.profile_ct as profile_ct import pdb from sortseq_tools import SortSeqError import cProfile import sortseq_tools.profile_info as profile_info import sortseq_tools.learn_model as learn_model import sortseq_tools.predictiveinfo as predictiveinfo import pstats #load in data sets for the test, we will just use the sort-seq crp-wt set df = io.load_dataset('input/dms_1_formatted') model_df = io.load_model('input/dms_1_model') #Profile profile_info #stats_fn = 'Profile_profile_info' #stats_fn_hr = 'Profile_profile_info_hr' #Profile.run('''profile_info.main(df,method='nsb')''',stats_fn) #Reformat and print to human readable profile #p = pstats.Stats(stats_fn,stream=open(stats_fn_hr,'w')) #p.strip_dirs() #p.sort_stats('cumtime') #p.print_stats() df_copy = df.copy() #profile learn_model lm=LS stats_fn = 'profile/Profile_learn_model_LS_mpra'
def wrapper(args): """ Wrapper for function for scan_model.main() """ # Prepare input to main model_df = io.load_model(args.model) seqtype, modeltype = qc.get_model_type(model_df) L = model_df.shape[0] if modeltype=='NBR': L += 1 chunksize = args.chunksize if not chunksize>0: raise SortSeqError(\ 'chunksize=%d must be positive'%chunksize) if args.numsites <= 0: raise SortSeqError('numsites=%d must be positive.'%args.numsites) if args.i and args.seq: raise SortSeqError('Cannot use flags -i and -s simultaneously.') # If sequence is provided manually if args.seq: pos_offset=0 contig_str = args.seq # Add a bit on end if circular if args.circular: contig_str += contig_str[:L-1] contig_list = [(contig_str,'manual',pos_offset)] # Otherwise, read sequence from FASTA file else: contig_list = [] inloc = io.validate_file_for_reading(args.i) if args.i else sys.stdin for i,record in enumerate(SeqIO.parse(inloc,'fasta')): name = record.name if record.name else 'contig_%d'%i # Split contig up into chunk)size bits full_contig_str = str(record.seq) # Add a bit on end if circular if args.circular: full_contig_str += full_contig_str[:L-1] # Define chunks containing chunksize sites start = 0 end = start+chunksize+L-1 while end < len(full_contig_str): contig_str = full_contig_str[start:end] contig_list.append((contig_str,name,start)) start += chunksize end = start+chunksize+L-1 contig_str = full_contig_str[start:] contig_list.append((contig_str,name,start)) if len(contig_list)==0: raise SortSeqError('No input sequences to read.') # Compute results outloc = io.validate_file_for_writing(args.out) if args.out else sys.stdout output_df = main(model_df,contig_list,numsites=args.numsites,\ verbose=args.verbose) # Write df to stdout or to outfile io.write(output_df,outloc,fast=args.fast)
import sortseq_tools.qc as qc import sortseq_tools.io as io import os import sortseq_tools.profile_ct as profile_ct import pdb from sortseq_tools import SortSeqError import cProfile import sortseq_tools.profile_info as profile_info import sortseq_tools.learn_model as learn_model import sortseq_tools.predictiveinfo as predictiveinfo import pstats #load in data sets for the test, we will just use the sort-seq crp-wt set df = io.load_dataset('input/rnap-wt-format.txt') model_df = io.load_model('input/rnap_model') #Profile profile_info #stats_fn = 'Profile_profile_info' #stats_fn_hr = 'Profile_profile_info_hr' #Profile.run('''profile_info.main(df,method='nsb')''',stats_fn) #Reformat and print to human readable profile #p = pstats.Stats(stats_fn,stream=open(stats_fn_hr,'w')) #p.strip_dirs() #p.sort_stats('cumtime') #p.print_stats() df_copy = df.copy() #profile learn_model lm=LS stats_fn = 'profile/Profile_learn_model_LS'