PREPROCESSOR_SCRIPT_DIR = os.path.join(PAR_DIR, 'preprocessors') RESOURCES_DIR = os.path.join(PAR_DIR, 'resources') import pepInput import shutil import ArgLib import DataFile import pepInput import glob import pickle if __name__ == '__main__': print 'dtadir is the directory containing the mzXML files to analyze' print 'peaks is a dictionary mapping {experiment_name: peaks csv}' print 'output is the directory to move all files to and set up the project in' options = ArgLib.parse(['init', 'dtadir', 'peaks', 'output']) print 'options.output: %s' % (options.output) print 'normpath(options.output): %s' % (os.path.normpath(options.output)) # Fails with an OSError if directory already exists os.makedirs(options.output) # Create database args = ['--sqlite', os.path.join(options.output, 'results.db')] print 'Models.py dir: %s' % (DATABASE_SCRIPT_DIR) DataFile.executeProcess(DATABASE_SCRIPT_DIR, 'Models.py', args) # Make experiment directories # Structure # /options.output # .../ExperimentName
return scanFDict if __name__ == "__main__": options = ArgLib.parse( [ "init", "dtadir", "config", "model", "output", "columns", "verbose", "paircutoff", "ppmsyserror", "ppmstd", "ppmpenalty", "ambigpenalty", "minedge", "maxedge", "alpha", "subgraphcut", "symbolmap", ] ) epStep = 0.00025 maxEp = 0.1 paramsDict = ArgLib.parseInitFile(options.init, options) with open(options.symbolmap, "r") as fin:
try: for i in range(int(dataDict['Num Ambig Edges'].max())): outFile.write('\t'.join([str(i), str(len(np.where(dataDict['Num Ambig Edges'] == i)[0]))]) + '\n') except ValueError: outFile.write('\t'.join(['N/A', 'N/A']) + '\n') except KeyError: print 'ERROR: No data for Ambiguous Edges available' try: writeHistogram(outFile, dataDict['PScore'], '%s %s PScore' % (progName, name)) except KeyError: print 'ERROR: No Data for PScore available' unitTestName = 'LADS Unit Test' if __name__ == '__main__': options = ArgLib.parse(['init', 'denovoscript', 'dtadir', 'config', 'model', 'output', 'columns', 'verbose', 'paircutoff', 'ppmsyserror', 'ppmstd', 'ppmpenalty', 'ambigpenalty', 'minedge', 'maxedge', 'alpha', 'lads', 'sequest', 'mascot', 'pepnovo', 'database', 'mainprogname', 'progdict', 'comp', 'subgraphcut', 'symbolmap', 'pnovo', 'peaks', 'combined', 'srchid']) outBase = os.path.splitext(options.output)[0] paramsDict = ArgLib.parseInitFile(options.init, options) interpreter = 'python2.6' if options.denovoscript: DNSprog = options.denovoscript progDict = {unitTestName: 'LADS'} else: progDict = {} if options.progdict: print options.progdict progDict = eval(options.progdict) else:
#print procSeq if cutOff == 0: return ''.join(procSeq), [] else: ambig_edges = [] for i, aa in enumerate(procSeq): if LCs[i] < cutOff: procSeq[i] = ambigAA ambig_edges += [(0, Constants.aminoacids[aa][2])] return ''.join(procSeq), ambig_edges if __name__ == '__main__': print 'In this program, the PEAKS argument is just the location of the PEAKS output to parse. Number argument indicates ALC cutoff to form ambig edges (set to 0 to not form any amibiguous edges' options = ArgLib.parse(['init', 'output', 'symbolmap', 'peaks', 'cutoff']) AMBIG_AA = '@' paramsDict = ArgLib.parseInitFile(options.init, options) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap({'PEAKS': 'PEAKS'}, symbolMap, paramsDict) #print seqMap scanInfo = DataFile.getScanInfo(options.peaks, delimiter=',')[1] if 'Peptide' in scanInfo[0]: seq_col = 'Peptide' else: seq_col = 'Sequence'
'em': top_item[2][0], 'log_em': top_item[2][1], 'sib': item[1][0], 'con': item[1][1], 'occ': item[1][2], 'top': True }] print 'Executing update' connection.execute(stmt, update_data) if __name__ == '__main__': print 'modmaxcounts argument is number of iterations in initial EM over all results, maxcounts argument indicates maximum number of iterations before expectation-maximization terminates after reranking is completed (on the top ranked results only). Set fracs to \"all\" to run over all fractions for experiment or supply desired fracs to run EM over separated by commas' options = ArgLib.parse([ 'init', 'sqlitedb', 'experimentname', 'fracs', 'maxcounts', 'modmaxcounts', 'output' ]) t1 = time.time() paramsDict = ArgLib.parseInitFile(options.init, options) outBase = os.path.splitext(options.output)[0] engine = create_engine('sqlite:///' + options.sqlitedb, echo=True) conn = engine.connect() conn.execute("PRAGMA max_page_count = max_page;") conn.execute("PRAGMA temp_store = 2;") conn.execute("PRAGMA page_size") try: experiment_id = conn.execute(
try: engine.execute('ALTER TABLE %s ADD COLUMN %s %s' % (table_name, column_name, column_type)) except sqlalchemy.exc.OperationalError: print 'CRITICAL ERROR: column %s already exists in table' % column_name def get_tissue_experiment_map(tissue_paths): tissue_map = defaultdict(list) for experiment_dir in tissue_paths: db_loc = os.path.join(experiment_dir, 'results.db') engine = create_engine('sqlite:///' + db_loc, echo=True) conn = engine.connect() experiments = Models.fetch_all_experiments(conn) for experiment_id, experiment_name in experiments: tissue_map[os.path.basename( os.path.normpath(path))] += [experiment_name] conn.close() return tissue_map if __name__ == '__main__': print 'Creates DB from defined schema' options = ArgLib.parse(['sqlitedb']) engine = create_engine('sqlite:///' + options.sqlitedb, echo=True) Base.metadata.create_all(engine)
for seq in rand_smpl: out_file.write(seq + '\n') out_file.close() else: print('maxsize g') out_file = open(out_file_name, 'w') for seq in new_seqs: out_file.write(seq + '\n') out_file.close() if __name__ == '__main__': print( 'This program will take a FASTA file from the --lads argument and output a six-frame translation of the file to output. Number refers to maximum size of sequence in resulting FASTA file. If a chromosomal region exceeds this length with no stop codons, the sequence will be chunked with a 100 aa overhang at each edge. Minimum Length of peptide in FASTA file is 5.' ) options = ArgLib.parse(['lads', 'output', 'number']) outFile = open(options.output, 'w') #chunkSize = int(options.number) for seqName, sequence in sequenceGenerator(options.lads): #for frame in [1, 2, 3, -1, -2, -3]: for frame in [1, 2, 3]: #print(seqName, frame) transSeq = getTranslation(sequence.upper(), frame) #chunkAndWriteSequence(outFile, seqName, transSeq, frame, len(sequence), lineLength=60, chunkSize=2000, dontReadThrough=['X'], minPeptLength=10, overhang=50) #transSeqName = seqName + ('_+' if frame > 0 else '_') + str(frame) transSeqName = getTransSeqNameForGFY(seqName, frame) writeSequence(outFile, transSeqName, transSeq) outFile.close()
if start == None: start = list(findAll(peptide,proteinSeq)) end = [startInd + len(peptide) for startInd in start] proteinSeq = '-' + proteinSeq + '-' return [proteinSeq[start[i]] + '.' + proteinSeq[start[i]+1:end[i]+1] + '.' + proteinSeq[end[i]+1] for i in range(len(start))] def getStartAndEndInds(proteinSeq, peptide): startInd = proteinSeq.index(peptide) return startInd, startInd+len(peptide) def removeNoncanonicalAminoAcids(subjectSeq): return subjectSeq.replace('Z', 'Q').replace('B', 'N').replace('X', '').replace('U', 'C').replace('L', 'I').replace('J', '') if __name__ == '__main__': print 'This program will take a tdv file of BLAST results indexed by scanF and attempt to explain the discrepancies using the unimod modification dictionary. Mainprogname is a dict mapping the name of the score and peptide field to their corresponding URIs. Fields are ScanF, Peptide, References, Ambig Edges (optional), Ref Peptide, and Num Identical' options = ArgLib.parse(['init', 'output', 'ppmstd', 'comp', 'unimoddict', 'mainprogname'], [{'opts': ('-f', '--fasta'), 'attrs': {'type': 'string', 'dest': 'fasta', 'help': 'Location of reference fasta containing reference proteins (same file used to generate BLAST DB).'}}]) paramsDict = ArgLib.parseInitFile(options.init, options) infoDict = eval(options.mainprogname) with open(options.unimoddict) as fin: unimodDict = pickle.load(fin) hashedUnimodDict = hashUnimodDict(unimodDict) outFile = open(options.output, 'w') cols = ['ScanF', 'Score', 'Peptide', 'Unmod Peptide', 'References', 'Modifications', 'DB Peptide', 'Alignment Score'] if 'Ambig Edges' in infoDict: cols.insert(2, 'Ambig Edges') outFile.write('\t'.join([col for col in cols]) + '\n')
for item in proc_tag_graph.values(): scanData = {'ScanF': item['ScanF'], 'Alignment Score': item['Alignment Score'], 'Matching Tag Length': item['Matching Tag Length'], 'PEAKS ALC (%)': item['De Novo Score'], 'PEAKS Peptide': item['De Novo Peptide'], 'Decoy?': item['Decoy Status']} for context in item['Context']: scanData['Context'] = context[0] scanData['Modifications'] = context[1] scanData['Proteins'] = context[2] outFile.write('\t'.join([str(scanData[col]) for col in cols]) + '\n') outFile.close() if __name__ == '__main__': print 'dtadir argument points to parent directory of de_novo and mzML files' print 'output is a string of values to append to the de novo filename in the output' options = ArgLib.parse(['init', 'dtadir', 'ppmstd', 'modtolerance', 'unimoddict', 'modmaxcounts', 'maxcounts', 'fmindex', 'denovo', 'model', 'config', 'output'], [{'opts': ('-F', '--fraction'), 'attrs': {'type': 'int', 'dest': 'fraction', 'help': 'Fraction to run TAG_GRAPH on'}}, {'opts': ('-x', '--splittaxon'), 'attrs': {'dest': 'splittaxon', 'action': 'store_true', 'default': False, 'help': 'Flag. For searches of metaproteomic databases, split identical context entries by taxon for accurate consideration via EM.'}}]) fileFound = False outDir = os.path.join(options.dtadir, 'taggraph') peaksDir = os.path.join(options.dtadir, 'de_novo') dataDir = os.path.join(options.dtadir, 'data') localDtaDir = '' try: # This should throw an exception if the directory for dta files does not yet exist (it will then be created) ''' Replace os.path.sep with '/' to fix Windows backslash issues. --smp dtaDir = glob.glob(dataDir + os.path.sep + '*f%02d'%options.fraction)[0] + os.path.sep ''' localDtaDir = glob.glob(dataDir + '/' + '*f%02d'%options.fraction)[0] + '/' print localDtaDir+"\n" except IndexError:
#session.commit() for fraction_name in fractions: results_files = glob.glob(results_dir + os.path.sep + '*_' + fraction_name + '_*tdv') print fraction_name, results_files if importTAGGRAPHResults(connection, experiment_name, fraction_name, results_files): print 'Fraction %s imported successfully'%fraction_name else: print 'ERROR: Unable to import results for fraction %s'%fraction_name return True if __name__ == '__main__': options = ArgLib.parse(['sqlitedb', 'taggraph', 'init', 'fmindex', 'modtolerance', 'ppmstd', 'maxcounts', 'modmaxcounts', 'experimentname', 'fracs'], optArgs=[{ 'opts': ('-Y', '--type'), 'attrs': {'type': 'string', 'dest': 'type', 'default': None, 'help': 'Value is either experiment or single, defines where import is an experiment import or single sample import'} }]) print 'If importing just a single fraction, ignore all arguments except sqlitedb, taggraph, type (set as single), experimentname, fracs (set as fraction name)' print 'If type of import is experiment, set taggraph argument to directory where results are located, and fracs to a tuple of fractions to import. TAGGRAPH parameters (init, fmindex, modtolerance, ppmstd, maxcounts, modmaxcounts) will be saved in db for record keeping' engine = create_engine('sqlite:///' + options.sqlitedb, echo=True) #Session = sessionmaker(bind=engine) #session = Session() conn = engine.connect() if options.type == "single": importTAGGRAPHResults(conn, options.experimentname, options.fracs, eval(options.taggraph)) elif options.type == "experiment":
binsDict[numBins-1][1] += 1 else: binsDict[bin][1] += 1 for bin in binsDict: binsDict[bin][2] = binsDict[bin][0] - binsDict[bin][1] outFile.write('\n%s Scan Number Difference Distribution. Max Diff: %i' % (name, maxDiff) + '\n') outFile.write('\t'.join(['Diff Bin', 'Test Pairs', 'True Pairs', 'False Pairs']) + '\n') for i in range(numBins): outFile.write('\t'.join([str(elem) for elem in [bins[i], binsDict[i][0], binsDict[i][1], binsDict[i][2]]]) + '\n') if __name__ == '__main__': print 'Model refers to svmmodel used' options = ArgLib.parse(['dtadir', 'combined', 'sequest', 'mascot', 'database', 'output', 'ppmstd', 'init', 'symbolmap']) paramsDict = ArgLib.parseInitFile(options.init, options) progDict = ArgLib.getProgDict(An.searchprogs, options) dbDict = DataFile.getDBInfo(options.database) infoMap = dbDict['infoMap'] with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict) processedInfo = {} if options.mascot:
else: return (seq, None) except KeyError: return False def parseDBScans(fDict, prog, seqMap, dbDict): processedInfo = {} for csvfile in fDict.keys(): MASCOTData = DataFile.getScanInfo(csvfile, dbDict[prog]['fields'], delimiter=',') processedInfo[fDict[csvfile]] = An.preprocessDatabaseScanInfo(MASCOTData, seqMap[fDict[csvfile]], dbDict[prog]['fieldmap']) return processedInfo #Number argument refers to minimum number of search prog results which have the same peptide for it to be included in the final output if __name__== '__main__': options = ArgLib.parse(['init', 'sequest', 'lads', 'mascot', 'output', 'database', 'symbolmap', 'number']) paramsDict = ArgLib.parseInitFile(options.init, options) dbDict = DataFile.getDBInfo(options.database) progDict = ArgLib.getProgDict(An.searchprogs, options) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict) if hasattr(options, 'number'): minNumScans = int(options.number) else: minNumScans = 1 processedInfo = {}
for bin in binsDict: binsDict[bin][2] = binsDict[bin][0] - binsDict[bin][1] outFile.write("\n%s Scan Number Difference Distribution. Max Diff: %i" % (name, maxDiff) + "\n") outFile.write("\t".join(["Diff Bin", "Test Pairs", "True Pairs", "False Pairs"]) + "\n") for i in range(numBins): outFile.write( "\t".join([str(elem) for elem in [bins[i], binsDict[i][0], binsDict[i][1], binsDict[i][2]]]) + "\n" ) if __name__ == "__main__": print "Model refers to svmmodel used" options = ArgLib.parse( ["dtadir", "combined", "sequest", "mascot", "database", "output", "ppmstd", "init", "symbolmap"] ) paramsDict = ArgLib.parseInitFile(options.init, options) progDict = ArgLib.getProgDict(An.searchprogs, options) dbDict = DataFile.getDBInfo(options.database) with open(options.symbolmap, "r") as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict) outFile = open(options.output, "w") print options.dtadir dtaList = glob.glob(options.dtadir + "/*.dta") scanFDict = getScanFDict(dtaList)
print "Saved %d Generators to Gens.Iterative.lh5" % Generators_Original['XYZList'].shape[0] return if __name__ == "__main__": print """\nAssigns data to generators that was not originally used in the clustering.\n Output: -- Assignments.h5: a matrix of assignments where each row is a vector corresponding to a data trajectory. The values of this vector are the cluster assignments. -- Assignments.h5.RMSD: Gives the RMSD from the assigned frame to its Generator. \n""" arglist=["projectfn", "generators", "atomindices","outdir","rmsdcutoff","assignments","assrmsd","stride"] options=ArgLib.parse(arglist) print sys.argv print options P1=Project.Project.LoadFromHDF(options.projectfn) AInd=numpy.loadtxt(options.atomindices, int) Generators=Trajectory.Trajectory.LoadTrajectoryFile(options.generators,Conf=P1.Conf) if options.assignments != "None": ass = Serializer.LoadData(options.assignments) else: ass = None if options.assrmsd != "None": assrmsd = Serializer.LoadData(options.assrmsd) else: assrmsd = None stride = int( options.stride ) run(P1, AInd, Generators,options.outdir,float(options.rmsdcutoff),ass, assrmsd,stride)
return featureNames def writeFeatures(featureList, rank, qid, outFile, comment=""): outFile.write('%i ' % (rank,) + ' '.join(['%i:%f' % (i+1, feature) for i, feature in enumerate(featureList)]) + '# %s\n' % (comment,)) def printFeatureNames(featureNames): for i, feature in enumerate(featureNames): print '%i. %s' % (i+1, feature) def printFeatures(featureNames, featureList): for i, feature in enumerate(featureNames): print '%i. %s: %f' % (i+1, feature, featureList[i]) if __name__ == '__main__': print 'This program generates LETOR format training data for the training of a discriminator. dtadir is of the formate {/loc of dtadir: (loc of LADS SequenceDTAsTDV.py LOG file, loc of combined SEQUEST-MASCOT database results' options = ArgLib.parse(['init', 'dtadir', 'ppmstd', 'symbolmap', 'output', 'model', 'config']) paramsDict = ArgLib.parseInitFile(options.init, options) pairConfigurations = paramsDict['Pair Configurations'] ppm = float(options.ppmstd) dtadirInfo = eval(options.dtadir) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap({'LADS Unit Test': 'LADS'}, symbolMap, paramsDict) seqMap = seqMap['LADS Unit Test'] PNet = PN.ProbNetwork(options.config, options.model) outFile = open(options.output, 'w')
progDict[name] = prog def getAllScanF(processedInfo): scanFs = np.array([], dtype=np.dtype('int')) for progName in processedInfo.keys(): scanFs = np.append( scanFs, np.array(processedInfo[progName].keys(), dtype=np.dtype('int'))) return np.unique(scanFs) if __name__ == '__main__': options = ArgLib.parse([ 'init', 'lads', 'sequest', 'mascot', 'pepnovo', 'output', 'database', 'symbolmap', 'pnovo', 'peaks', 'combined' ]) paramsDict = ArgLib.parseInitFile(options.init, options) progDict = ArgLib.getProgDict(DataFile.searchprogs, options) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict) dbDict = DataFile.getDBInfo(options.database) processedInfo = {} if options.lads: LADSdict = eval(options.lads) for tdvfile in LADSdict.keys():
from Models import Experiment, Result, Fraction from sqlalchemy import create_engine, func from sqlalchemy.sql import select, and_ experiment = Experiment.__table__ fraction = Fraction.__table__ result = Result.__table__ from collections import defaultdict if __name__ == '__main__': print 'output is the name of file to write the report to' print 'experimentname is the experiment to check for integrity' print 'dtadir is directory of parent project' options = ArgLib.parse(['dtadir', 'experimentname', 'output']) sqlitedb_loc = os.path.join(options.dtadir, 'results.db') engine = create_engine('sqlite:///' + sqlitedb_loc, echo=True) conn = engine.connect() experiment_name = options.experimentname append_string = '' try: experiment_id = conn.execute( select([ experiment.c.id ]).where(experiment.c.name == experiment_name)).fetchone()[0] except TypeError: append_string = '.ERROR' experiment_id = 0 experiment_dir = os.path.join(options.dtadir, experiment_name) #Get all the fraction numbers
for line in modelFile: if line[0] == '#': continue weakLearners = line.split(' ') for learner in weakLearners: learnerTuple = learner.split(':') rankingModel += [(int(learnerTuple[0]), float(learnerTuple[1]), float(learnerTuple[2]))] return rankingModel if __name__ == '__main__': print 'This program generates a results file containing Raw lads output postscored with the algorithm of choice. The discmodel is a supplied model, if necessary for the postscoring algorithm' options = ArgLib.parse(['init', 'ppmstd', 'dtadir', 'lads', 'sequest', 'config', 'model', 'output', 'symbolmap'], optArgs=[{'opts': ('-D', '--discmodel'), 'attrs': {'type': 'string', 'dest': 'discmodel', 'help': 'Model used to calculate discriminant score'}}, {'opts': ('-P', '--pairconfig'), 'attrs': {'type': 'string', 'dest': 'pairconfig', 'help': 'Name of LADS Pair Configuration'}}, {'opts': ('-F', '--featurelist'), 'attrs': {'type': 'string', 'dest': 'featurelist', 'help': 'File containing pickled list of desired features (optional)'}}]) parent = os.path.abspath(os.pardir) PNet = PN.ProbNetwork(options.config, options.model) paramsDict = ArgLib.parseInitFile(options.init, options) pairConfigurations = paramsDict['Pair Configurations'] LADSSeqInfo = GLFD.parseSequenceDTAsLogfile(options.lads) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap({'LADS Unit Test': 'LADS'}, symbolMap, paramsDict) seqMap = seqMap['LADS Unit Test'] if options.featurelist:
writeSequence(outFile, seqName, fullSequence[startInd:endInd], lineLength=lineLength) def writeSequence(outFile, seqName, sequence, lineLength=60): outFile.write(seqName + '\n') for i in range(0, len(sequence), lineLength): outFile.write(sequence[i:i+lineLength] + '\n') def getTransSeqNameForGFY(seqName, frame, addStringToBase=''): seqNameList = seqName.split(' ') seqNameList[0] = seqNameList[0] + ('_+' if frame > 0 else '_') + str(frame) + addStringToBase return ' '.join(seqNameList) + ' frame=%i' %(frame,) if __name__ == '__main__': print 'This program will take a FASTA file from the --lads argument and output a six-frame translation of the file to output. Number refers to maximum size of sequence in resulting FASTA file. If a chromosomal region exceeds this length with no stop codons, the sequence will be chunked with a 100 aa overhang at each edge. Minimum Length of peptide in FASTA file is 5.' options = ArgLib.parse(['lads', 'output', 'number']) outFile = open(options.output, 'w') #chunkSize = int(options.number) for seqName, sequence in sequenceGenerator(options.lads): for frame in [1, 2, 3, -1, -2, -3]: #for frame in [1,2,3]: #print seqName, frame transSeq = getTranslation(sequence.upper(), frame) chunkAndWriteSequence(outFile, seqName, transSeq, frame, len(sequence), lineLength=60, chunkSize=2000, dontReadThrough=['X'], minPeptLength=10, overhang=50) #transSeqName = seqName + ('_+' if frame > 0 else '_') + str(frame) #transSeqName = getTransSeqNameForGFY(seqName, frame) #writeSequence(outFile, transSeqName, transSeq) outFile.close()
mod_counts[mod]['Unique'].add(context) for mod in sorted(mod_counts, key=lambda k: -len(mod_counts[k]['Unique'])): outFile.write('\t'.join([ str(mod), str(mod_counts[mod]['Total']), str(len(mod_counts[mod]['Unique'])) ]) + '\n') outFile.close() if __name__ == '__main__': print 'modmaxcounts argument is number of iterations in initial EM over all results, maxcounts argument indicates maximum number of iterations before expectation-maximization terminates after reranking is completed (on the top ranked results only). Set fracs to \"all\" to run over all fractions for experiment or supply desired fracs to run EM over separated by commas' options = ArgLib.parse([ 'init', 'dtadir', 'sqlitedb', 'experimentname', 'fracs', 'model', 'config', 'modtolerance', 'ppmstd', 'output' ]) params_dict = ArgLib.parseInitFile(options.init, options) engine = create_engine('sqlite:///' + options.sqlitedb, echo=True) conn = engine.connect() conn.execute("PRAGMA max_page_count = max_page;") conn.execute("PRAGMA temp_store = 2;") conn.execute("PRAGMA page_size") out_base = os.path.splitext(options.output)[0] p_net = PN.ProbNetwork(options.config, options.model) try:
def getPeptideContext(extended_sequence, start=None, end=None): return extended_sequence[ start - 1] + '.' + extended_sequence[start:end] + '.' + extended_sequence[end] def getStartAndEndInds(proteinSeq, peptide): startInd = proteinSeq.index(peptide) return startInd, startInd + len(peptide) if __name__ == '__main__': start_time = time.time() options = ArgLib.parse([ 'init', 'output', 'ppmstd', 'denovo', 'unimoddict', 'modmaxcounts', 'maxcounts', 'fmindex', 'scans' ]) # Amount of mass to add to ends of peptides when fetching sequence context for incomplete masses, may move to command line arguments later end_buffer = 250 # Minimum AA mass, used to decide how many amino acids to request from sequence index when attempting to resolve an inexact match min_aa_mass = 50 # Length of matched tags to tile across de novo and database sequences tag_length = 2 # Maximum number of matches to return per scan max_candidates = 100 # Set to Filtered mode filter_for_scans = False if options.scans != None: filter_for_scans = True
else: prev.terminate() curr += 1 p.start() for p in processes: p.join() for l in L: for j in l: outFile.write(str(j) + '\t') outFile.write('\n') if __name__ == '__main__' : options = ArgLib.parse(['init', 'dtadir', 'config', 'model', 'output', 'columns', 'verbose', 'paircutoff', 'ppmsyserror', 'ppmstd', 'ppmpenalty', 'ambigpenalty', 'minedge', 'maxedge', 'alpha', 'subgraphcut', 'symbolmap']) epStep = 0.00025 maxEp = 0.1 paramsDict = ArgLib.parseInitFile(options.init, options) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap({'LADS Unit Test': 'LADS'}, symbolMap, paramsDict) if options.columns: with open(options.columns) as fin: cols = pickle.load(fin) else: print 'Using default cols' cols = ['light scan', 'heavy scan', 'pair configuration', 'M+H', 'score', 'seq', 'epsilon', 'ambiguous edges', 'num ambig edges']
return processedInfo def updateProgDict(fDict, progDict, prog): for name in fDict.values(): progDict[name] = prog def getAllScanF(processedInfo): scanFs = np.array([], dtype=np.dtype('int')) for progName in processedInfo.keys(): scanFs = np.append(scanFs, np.array(processedInfo[progName].keys(), dtype=np.dtype('int'))) return np.unique(scanFs) if __name__ == '__main__': options = ArgLib.parse(['init', 'lads', 'sequest', 'mascot', 'pepnovo', 'output', 'database', 'symbolmap', 'pnovo', 'peaks', 'combined', 'srchid']) paramsDict = ArgLib.parseInitFile(options.init, options) progDict = ArgLib.getProgDict(An.searchprogs, options) with open(options.symbolmap, 'r') as fin: symbolMap = pickle.load(fin) seqMap = DataFile.generateSeqMap(progDict, symbolMap, paramsDict) dbDict = DataFile.getDBInfo(options.database) processedInfo = {} if options.lads: LADSdict = eval(options.lads) for tdvfile in LADSdict.keys(): LADSScanInfo = DataFile.getScanInfo(tdvfile, dbDict['LADS']['fields'], delimiter='\t')