Beispiel #1
0
def test(algorithm, datasetfolder):
    """algorithm is a function that takes a motif length and a set of sequences
    and returns a list of sequences,positions of the best motif

    datasetfolder is the name of the dataset to test on
    returns PerformanceResults of the run"""

    seq = readSequences(os.path.join(datasetfolder, "sequences.fa"))
    motifLength = readMotifLength(os.path.join(datasetfolder,
                                               "motiflength.txt"))
    actualLocations = readSites(os.path.join(datasetfolder, "sites.txt"))
    assert len(seq) == len(actualLocations), "there should be a site location"
    "for every sequence"

    startTime = time.clock()
    result = algorithm(motifLength, seq)
    endTime = time.clock()

    elapsedTime = endTime - startTime
    runScore = M.informationInSelection(motifLength, result)
    actualScore = M.informationInSelection(motifLength,
                                           zip(seq, actualLocations))

    pwfilename = os.path.join(datasetfolder, "predictedmotif.txt")
    writePositionWeightMatrix(pwfilename, motifLength, result)

    sitesfilename = os.path.join(datasetfolder, "predictedsites.txt")
    writeSites(sitesfilename, [site for (_, site) in result])

    return PerformanceResults(time=elapsedTime,
                              algorithmScore=runScore,
                              motifScore=actualScore)
Beispiel #2
0
def test(algorithm, datasetfolder):
    """algorithm is a function that takes a motif length and a set of sequences
    and returns a list of sequences,positions of the best motif

    datasetfolder is the name of the dataset to test on
    returns PerformanceResults of the run"""

    seq = readSequences(os.path.join(datasetfolder, "sequences.fa"))
    motifLength = readMotifLength(
        os.path.join(datasetfolder, "motiflength.txt"))
    actualLocations = readSites(os.path.join(datasetfolder, "sites.txt"))
    assert len(seq) == len(actualLocations), "there should be a site location"
    "for every sequence"

    startTime = time.clock()
    result = algorithm(motifLength, seq)
    endTime = time.clock()

    elapsedTime = endTime - startTime
    runScore = M.informationInSelection(motifLength, result)
    actualScore = M.informationInSelection(motifLength,
                                           zip(seq, actualLocations))

    pwfilename = os.path.join(datasetfolder, "predictedmotif.txt")
    writePositionWeightMatrix(pwfilename, motifLength, result)

    sitesfilename = os.path.join(datasetfolder, "predictedsites.txt")
    writeSites(sitesfilename, [site for (_, site) in result])

    return PerformanceResults(time=elapsedTime,
                              algorithmScore=runScore,
                              motifScore=actualScore)
Beispiel #3
0
def uniformRandomSampling(iterations, motifLength, sequences):
    """Finds motifs by selecting a random placement, and checking the
    fitness"""
    # chose an arbitrary best to start
    bestSelection = [(s, 0) for s in sequences]

    for _ in range(iterations):
        selections =\
            [(s, random.randrange(len(s) - motifLength + 1))
             for s in sequences]
        if M.informationInSelection(motifLength, selections) >\
                M.informationInSelection(motifLength, bestSelection):
            bestSelection = selections

    return bestSelection
Beispiel #4
0
def uniformRandomSampling(iterations, motifLength, sequences):
    """Finds motifs by selecting a random placement, and checking the
    fitness"""
    # chose an arbitrary best to start
    bestSelection = [(s, 0) for s in sequences]

    for _ in range(iterations):
        selections =\
            [(s, random.randrange(len(s) - motifLength + 1))
             for s in sequences]
        if M.informationInSelection(motifLength, selections) >\
                M.informationInSelection(motifLength, bestSelection):
            bestSelection = selections

    return bestSelection
Beispiel #5
0
def update_stock(stock):
    try:
        stock_data = Motif.get_stock_data(stock)
        stock_data_list.append(stock_data)
        print(stock + ' refreshed')
    except Exception as e:
        print(stock + ' not refreshed:\n' + str(e))
Beispiel #6
0
def writePositionWeightMatrix(filename, motifLength, selections):
    subseq = M.extractSelections(motifLength, selections)

    counts = [{base: 0 for base in Gen.bases} for i in range(motifLength)]

    for seq in subseq:
        for i, char in enumerate(seq):
            counts[i][char] += 1

    with open(filename, 'w') as f:
        f.write(">PMOTIF\t" + str(motifLength) + "\n")
        for pos in counts:
            line = "\t".join(str(pos[c]) for c in Gen.bases)
            f.write(line + "\n")
        f.write("<")
            spec = 0.0
        print "%5.4f" % spec,
        print "PPV:",
        try:
            ppv = float(self.getProp('num.Feature.Hits.TP')) / self.getProp(
                'num.Feature.Hits')
        except ZeroDivisionError:
            ppv = 0.0
        print "%5.4f" % ppv


# -----------------------------------------------------------------------
# main()

# Generate Motif object from patterns
motiflist = map(lambda x: Motif.Motif(x), patternlist)

globalAnalysis = SummaryAnalysis('TOTAL')
globalAnalysis.displayHeadings()

# Load Sitefile
siteFile = SiteFile(sitefilename)

# Open PDB dbh
pdbdbh = proteindatabankdbh.dbopen()

# For each PDB
for pdbid in pdblist:
    print >> sys.stderr, pdbid

    if motiflist:
Beispiel #8
0
 def evalWithSeq(seq):
     return lambda pos: M.infoInPositions(motifLength, seq, pos)
Beispiel #9
0
 def evalPositions(pos):
     return M.infoInPositions(motifLength, sequences, pos)
Beispiel #10
0
 def evalWithSeq(seq):
     return lambda pos: M.infoInPositions(motifLength, seq, pos)
Beispiel #11
0
 def evalPositions(pos):
     return M.infoInPositions(motifLength, sequences, pos)
Beispiel #12
0
# TRAIN SVM
print("TRAIN SVM")

Cobj = SVM(xtrain, y_train)
Cobj.train(C=1.)
Cobj.svm_save("results/svm_trained.pkl")

# COMPUTE gPOIM
print("COMPUTE gPOIM")
small_k = 2
Pobj = gPOIM()
Pobj.set_up(Cobj, samplesize=100, small_k=small_k)
Pobj.save("results/gPOIM.pkl")

# PLOT gPOIM
view.plot_gPOIM(Pobj.gPOIM, "results/gPOIM.pdf")

# EXTRACT Motif
print("EXTRACT MOTIF")
motif_start, motif_len = get_motif_pos_len_similar_diffPOIM(Pobj.gPOIM)
motif_len = [20]
Mobj = Motif()
motif = Mobj.find(2,
                  motif_len,
                  Pobj.gPOIM,
                  motif_start,
                  base=['A', 'C', 'G', 'T'])
np.savetxt("results/pwm.pkl", motif[0])
seqLogo("results/pwm.pkl")
Beispiel #13
0
        'https://www.motifinvesting.com/motifs#catalog=our&checked=featured&checked=hot&limit=90',
        'https://www.motifinvesting.com/motifs#catalog=community&checked=featured&checked=hot&checked=purchased&checked=exclude-etfs&limit=90&offset=0'
    ]

stock_data_list = []

def update_stock(stock):
    try:
        stock_data = Motif.get_stock_data(stock)
        stock_data_list.append(stock_data)
        print(stock + ' refreshed')
    except Exception as e:
        print(stock + ' not refreshed:\n' + str(e))

if __name__ == '__main__':
    motifs = Motif.search_professional_motifs()
    stocks = set(['LNKD', 'TWTR', 'MSFT', 'GOOG', 'AMBA', 'FB', 'AAPL', 'NFLX', 'PYPL', ])
    for motif in motifs:
        print('motif:', motif)
        _stocks = Motif.get_stocks_from_motif(motif)
        for stock in _stocks:
            #print('stock:', stock)
            stocks.add(stock)
        print(len(stocks))

    #style0 = xlwt.easyxf('font: name Times New Roman, color-index red, bold on', num_format_str='#,##0.00')
    #style1 = xlwt.easyxf(num_format_str='D-MMM-YY')

    #wb = xlwt.Workbook()
    #ws = wb.add_sheet('US Stock')