def filldb(mgf_id, mgf_metadata, mgf_title, mgf_mass, mgf_charge,
           mgf_sequence):
    session = connectToDB()
    session.execute(
        """INSERT INTO mgf.exp_spectrum (id, allmeta, title, pepmass, charge, data) 
        VALUES (%s, %s, %s, %s ,%s, %s)""",
        (mgf_id, mgf_metadata, mgf_title, mgf_mass, mgf_charge, mgf_sequence))
    session.shutdown()
def readFromFastaDB():
    cass_session = connectToDB()
    query = "SELECT peptide_id,pep_mass FROM fasta.pep_spec"
    select_results = cass_session.execute(query)
    for row in select_results:
        stringId = str(eval("row.peptide_id"))
        stringMass = str(eval("row.pep_mass"))
        fastaSpectrumIDs.append((stringId, stringMass))
    cass_session.shutdown()
def storeScores(esid, tsid, score):
    try:
        session = connectToDB()
        session.set_keyspace('scores')
        session.execute(
            """INSERT INTO scores.psm (id, exp_spectrum_uid, theo_spectrum_uid, score)
            VALUES (%s, %s, %s, %s)""",
            (uuid.uuid1(), uuid.UUID('{' + esid + '}'),
             uuid.UUID('{' + tsid + '}'), float(score)))
        session.shutdown()

    except Exception as e:
        print("error in saving score to cass: " + str(e))
def table_contents(toselect, table_name):
    cass_session = connectToDB()
    tempArray = []

    query = "SELECT " + toselect + " FROM " + table_name + ";"
    select_results = cass_session.execute(query)

    if "*" not in toselect:
        for row in select_results:
            stringRes = str(eval("row." + toselect))
            tempArray.append(stringRes)
    else:
        for row in select_results:
            tempArray.append(row)
    cass_session.shutdown()

    return tempArray
def run_step2():
    consumer_c2 = KafkaConsumer('topic_mgf',
                                bootstrap_servers=['localhost:9092'],
                                group_id='apoorva-thesis')
    #, auto_offset_reset='earliest')
    session = connectToDB()
    session.execute("""TRUNCATE table mgf.exp_spectrum  """)
    session.shutdown()

    readFromFastaDB()

    print("Consumer is ready to listen!")
    for message in consumer_c2:
        if "__final__" in message.key:
            print "All pairs for input MGF sent!"
            createPairs(None, True)
            sendPairs(None, None, True)
            sys.exit(0)

        if "__init__" not in message.key:
            filteredMGFdata = postProcessMgf(message)
            fullMGFkey = message.key.split("#")
            preTime = dt.now()
            storeMGF(fullMGFkey[0], filteredMGFdata)
            pairsCreated = createPairs(fullMGFkey[0], False)
            postTime = dt.now()
            # Profile block 2
            if profile:
                pr.disable()
                s = cStringIO.StringIO()
                sortby = 'cumulative'
                ps = pstats.Stats(pr, stream=s).sort_stats(sortby)
                ps.print_stats()
                print s.getvalue()
            # End of profile block 2
            sendPairs(pairsCreated,
                      timedelta.total_seconds(postTime - preTime),
                      False)  ### timedelta:  (days, seconds and microseconds)
        else:
            print "Initializing.... send again"
Example #6
0
                                ,bootstrap_servers=['localhost:9092']
                                , group_id='apoorva-thesis')
    session.execute("""TRUNCATE table mgf.exp_spectrum;  """)
    session.execute("""TRUNCATE table scores.psm;  """)
    #print("Going to read from fastadb")
    #readFromFastaDB()

    print("Consumer is ready to listen!")
    for message in consumer_c2:
        if '__final__' in message.key.decode('utf-8'):
            print ("All pairs for input MGF sent!")
            createPairs(None, True,None)
            sendPairs(None, None, True)
            session.shutdown()
            #sys.exit(0)
        elif '__init__' not in message.key.decode('utf-8'):
            filteredMGFdata = postProcessMgf(message)
            fullMGFkey = message.key.decode('utf-8').split("#")
            preTime = dt.now()
            storeMGF(fullMGFkey[0], filteredMGFdata, session)
            pairsCreated = createPairs(fullMGFkey[0], False, session)
            postTime = dt.now()
            if pairsCreated is not None:
                sendPairs(pairsCreated, timedelta.total_seconds(postTime-preTime), False)  ### timedelta:  (days, seconds and microseconds)
        else:
            print ("Initializing.... send again")

session = connectToDB()
run_step2(session)

Example #7
0
vector_location = os.path.join(os.path.dirname(__file__), 'datafiles')
receivedPairs = []
flag_dummyScore = True

# internal variables
m_lM = []
m_fI = []
m_plSeq = []
m_pfSeq = []
vector1 = []
vector2 = []
allMatchesToCompute = []
messages = set()
batch = set()
cass_session = connectToDB()
tv = 0


def loadRealData(mgfid, fastaid):
    print("Loading " + mgfid + " ...fasta " + fastaid)
    global allMatchesToCompute
    # xtandem: m_lM = the M+H + error for an mspectrum
    global m_lM  # a vector of MZ values of MGF
    # xtandem: m_fI = the M+H - error for an mspectrum
    global m_fI  # a vector of intensities of MGF
    # xtandem: plSeq = residue masses corresponding to the current sequence, converted into integers
    global m_plSeq  #
    # xtandem: m_pfSeq = residue masses corresponding to the current sequence in daltons
    global m_pfSeq  #
def run (id):
    session = connectToDB()
    run_step2(session, id)