def filldb(mgf_id, mgf_metadata, mgf_title, mgf_mass, mgf_charge, mgf_sequence): session = connectToDB() session.execute( """INSERT INTO mgf.exp_spectrum (id, allmeta, title, pepmass, charge, data) VALUES (%s, %s, %s, %s ,%s, %s)""", (mgf_id, mgf_metadata, mgf_title, mgf_mass, mgf_charge, mgf_sequence)) session.shutdown()
def readFromFastaDB(): cass_session = connectToDB() query = "SELECT peptide_id,pep_mass FROM fasta.pep_spec" select_results = cass_session.execute(query) for row in select_results: stringId = str(eval("row.peptide_id")) stringMass = str(eval("row.pep_mass")) fastaSpectrumIDs.append((stringId, stringMass)) cass_session.shutdown()
def storeScores(esid, tsid, score): try: session = connectToDB() session.set_keyspace('scores') session.execute( """INSERT INTO scores.psm (id, exp_spectrum_uid, theo_spectrum_uid, score) VALUES (%s, %s, %s, %s)""", (uuid.uuid1(), uuid.UUID('{' + esid + '}'), uuid.UUID('{' + tsid + '}'), float(score))) session.shutdown() except Exception as e: print("error in saving score to cass: " + str(e))
def table_contents(toselect, table_name): cass_session = connectToDB() tempArray = [] query = "SELECT " + toselect + " FROM " + table_name + ";" select_results = cass_session.execute(query) if "*" not in toselect: for row in select_results: stringRes = str(eval("row." + toselect)) tempArray.append(stringRes) else: for row in select_results: tempArray.append(row) cass_session.shutdown() return tempArray
def run_step2(): consumer_c2 = KafkaConsumer('topic_mgf', bootstrap_servers=['localhost:9092'], group_id='apoorva-thesis') #, auto_offset_reset='earliest') session = connectToDB() session.execute("""TRUNCATE table mgf.exp_spectrum """) session.shutdown() readFromFastaDB() print("Consumer is ready to listen!") for message in consumer_c2: if "__final__" in message.key: print "All pairs for input MGF sent!" createPairs(None, True) sendPairs(None, None, True) sys.exit(0) if "__init__" not in message.key: filteredMGFdata = postProcessMgf(message) fullMGFkey = message.key.split("#") preTime = dt.now() storeMGF(fullMGFkey[0], filteredMGFdata) pairsCreated = createPairs(fullMGFkey[0], False) postTime = dt.now() # Profile block 2 if profile: pr.disable() s = cStringIO.StringIO() sortby = 'cumulative' ps = pstats.Stats(pr, stream=s).sort_stats(sortby) ps.print_stats() print s.getvalue() # End of profile block 2 sendPairs(pairsCreated, timedelta.total_seconds(postTime - preTime), False) ### timedelta: (days, seconds and microseconds) else: print "Initializing.... send again"
,bootstrap_servers=['localhost:9092'] , group_id='apoorva-thesis') session.execute("""TRUNCATE table mgf.exp_spectrum; """) session.execute("""TRUNCATE table scores.psm; """) #print("Going to read from fastadb") #readFromFastaDB() print("Consumer is ready to listen!") for message in consumer_c2: if '__final__' in message.key.decode('utf-8'): print ("All pairs for input MGF sent!") createPairs(None, True,None) sendPairs(None, None, True) session.shutdown() #sys.exit(0) elif '__init__' not in message.key.decode('utf-8'): filteredMGFdata = postProcessMgf(message) fullMGFkey = message.key.decode('utf-8').split("#") preTime = dt.now() storeMGF(fullMGFkey[0], filteredMGFdata, session) pairsCreated = createPairs(fullMGFkey[0], False, session) postTime = dt.now() if pairsCreated is not None: sendPairs(pairsCreated, timedelta.total_seconds(postTime-preTime), False) ### timedelta: (days, seconds and microseconds) else: print ("Initializing.... send again") session = connectToDB() run_step2(session)
vector_location = os.path.join(os.path.dirname(__file__), 'datafiles') receivedPairs = [] flag_dummyScore = True # internal variables m_lM = [] m_fI = [] m_plSeq = [] m_pfSeq = [] vector1 = [] vector2 = [] allMatchesToCompute = [] messages = set() batch = set() cass_session = connectToDB() tv = 0 def loadRealData(mgfid, fastaid): print("Loading " + mgfid + " ...fasta " + fastaid) global allMatchesToCompute # xtandem: m_lM = the M+H + error for an mspectrum global m_lM # a vector of MZ values of MGF # xtandem: m_fI = the M+H - error for an mspectrum global m_fI # a vector of intensities of MGF # xtandem: plSeq = residue masses corresponding to the current sequence, converted into integers global m_plSeq # # xtandem: m_pfSeq = residue masses corresponding to the current sequence in daltons global m_pfSeq #
def run (id): session = connectToDB() run_step2(session, id)