def test(algorithm, datasetfolder): """algorithm is a function that takes a motif length and a set of sequences and returns a list of sequences,positions of the best motif datasetfolder is the name of the dataset to test on returns PerformanceResults of the run""" seq = readSequences(os.path.join(datasetfolder, "sequences.fa")) motifLength = readMotifLength(os.path.join(datasetfolder, "motiflength.txt")) actualLocations = readSites(os.path.join(datasetfolder, "sites.txt")) assert len(seq) == len(actualLocations), "there should be a site location" "for every sequence" startTime = time.clock() result = algorithm(motifLength, seq) endTime = time.clock() elapsedTime = endTime - startTime runScore = M.informationInSelection(motifLength, result) actualScore = M.informationInSelection(motifLength, zip(seq, actualLocations)) pwfilename = os.path.join(datasetfolder, "predictedmotif.txt") writePositionWeightMatrix(pwfilename, motifLength, result) sitesfilename = os.path.join(datasetfolder, "predictedsites.txt") writeSites(sitesfilename, [site for (_, site) in result]) return PerformanceResults(time=elapsedTime, algorithmScore=runScore, motifScore=actualScore)
def test(algorithm, datasetfolder): """algorithm is a function that takes a motif length and a set of sequences and returns a list of sequences,positions of the best motif datasetfolder is the name of the dataset to test on returns PerformanceResults of the run""" seq = readSequences(os.path.join(datasetfolder, "sequences.fa")) motifLength = readMotifLength( os.path.join(datasetfolder, "motiflength.txt")) actualLocations = readSites(os.path.join(datasetfolder, "sites.txt")) assert len(seq) == len(actualLocations), "there should be a site location" "for every sequence" startTime = time.clock() result = algorithm(motifLength, seq) endTime = time.clock() elapsedTime = endTime - startTime runScore = M.informationInSelection(motifLength, result) actualScore = M.informationInSelection(motifLength, zip(seq, actualLocations)) pwfilename = os.path.join(datasetfolder, "predictedmotif.txt") writePositionWeightMatrix(pwfilename, motifLength, result) sitesfilename = os.path.join(datasetfolder, "predictedsites.txt") writeSites(sitesfilename, [site for (_, site) in result]) return PerformanceResults(time=elapsedTime, algorithmScore=runScore, motifScore=actualScore)
def uniformRandomSampling(iterations, motifLength, sequences): """Finds motifs by selecting a random placement, and checking the fitness""" # chose an arbitrary best to start bestSelection = [(s, 0) for s in sequences] for _ in range(iterations): selections =\ [(s, random.randrange(len(s) - motifLength + 1)) for s in sequences] if M.informationInSelection(motifLength, selections) >\ M.informationInSelection(motifLength, bestSelection): bestSelection = selections return bestSelection
def update_stock(stock): try: stock_data = Motif.get_stock_data(stock) stock_data_list.append(stock_data) print(stock + ' refreshed') except Exception as e: print(stock + ' not refreshed:\n' + str(e))
def writePositionWeightMatrix(filename, motifLength, selections): subseq = M.extractSelections(motifLength, selections) counts = [{base: 0 for base in Gen.bases} for i in range(motifLength)] for seq in subseq: for i, char in enumerate(seq): counts[i][char] += 1 with open(filename, 'w') as f: f.write(">PMOTIF\t" + str(motifLength) + "\n") for pos in counts: line = "\t".join(str(pos[c]) for c in Gen.bases) f.write(line + "\n") f.write("<")
spec = 0.0 print "%5.4f" % spec, print "PPV:", try: ppv = float(self.getProp('num.Feature.Hits.TP')) / self.getProp( 'num.Feature.Hits') except ZeroDivisionError: ppv = 0.0 print "%5.4f" % ppv # ----------------------------------------------------------------------- # main() # Generate Motif object from patterns motiflist = map(lambda x: Motif.Motif(x), patternlist) globalAnalysis = SummaryAnalysis('TOTAL') globalAnalysis.displayHeadings() # Load Sitefile siteFile = SiteFile(sitefilename) # Open PDB dbh pdbdbh = proteindatabankdbh.dbopen() # For each PDB for pdbid in pdblist: print >> sys.stderr, pdbid if motiflist:
def evalWithSeq(seq): return lambda pos: M.infoInPositions(motifLength, seq, pos)
def evalPositions(pos): return M.infoInPositions(motifLength, sequences, pos)
# TRAIN SVM print("TRAIN SVM") Cobj = SVM(xtrain, y_train) Cobj.train(C=1.) Cobj.svm_save("results/svm_trained.pkl") # COMPUTE gPOIM print("COMPUTE gPOIM") small_k = 2 Pobj = gPOIM() Pobj.set_up(Cobj, samplesize=100, small_k=small_k) Pobj.save("results/gPOIM.pkl") # PLOT gPOIM view.plot_gPOIM(Pobj.gPOIM, "results/gPOIM.pdf") # EXTRACT Motif print("EXTRACT MOTIF") motif_start, motif_len = get_motif_pos_len_similar_diffPOIM(Pobj.gPOIM) motif_len = [20] Mobj = Motif() motif = Mobj.find(2, motif_len, Pobj.gPOIM, motif_start, base=['A', 'C', 'G', 'T']) np.savetxt("results/pwm.pkl", motif[0]) seqLogo("results/pwm.pkl")
'https://www.motifinvesting.com/motifs#catalog=our&checked=featured&checked=hot&limit=90', 'https://www.motifinvesting.com/motifs#catalog=community&checked=featured&checked=hot&checked=purchased&checked=exclude-etfs&limit=90&offset=0' ] stock_data_list = [] def update_stock(stock): try: stock_data = Motif.get_stock_data(stock) stock_data_list.append(stock_data) print(stock + ' refreshed') except Exception as e: print(stock + ' not refreshed:\n' + str(e)) if __name__ == '__main__': motifs = Motif.search_professional_motifs() stocks = set(['LNKD', 'TWTR', 'MSFT', 'GOOG', 'AMBA', 'FB', 'AAPL', 'NFLX', 'PYPL', ]) for motif in motifs: print('motif:', motif) _stocks = Motif.get_stocks_from_motif(motif) for stock in _stocks: #print('stock:', stock) stocks.add(stock) print(len(stocks)) #style0 = xlwt.easyxf('font: name Times New Roman, color-index red, bold on', num_format_str='#,##0.00') #style1 = xlwt.easyxf(num_format_str='D-MMM-YY') #wb = xlwt.Workbook() #ws = wb.add_sheet('US Stock')