def runExtractUniProt(species,species_full,uniprot_filename_url,trembl_filename_url,force): global uniprot_ensembl_db;uniprot_ensembl_db={} global uniprot_db;uniprot_db={}; global species_name; global uniprot_fildir global secondary_to_primary_db; secondary_to_primary_db={} import update; reload(update) species_name = species_full import UI; species_names = UI.getSpeciesInfo() species_full = species_names[species] species_full = string.replace(species_full,' ','_') uniprot_file = string.split(uniprot_filename_url,'/')[-1]; uniprot_file = string.replace(uniprot_file,'.gz','') trembl_file = string.split(trembl_filename_url,'/')[-1]; trembl_file = string.replace(trembl_file,'.gz','') uniprot_fildir = 'AltDatabase/uniprot/'+species+'/' uniprot_download_fildir = 'AltDatabase/uniprot/' uniprot_ens_file = species+'_Ensembl-UniProt.txt'; uniprot_ens_location = uniprot_fildir+uniprot_ens_file uniprot_location = uniprot_download_fildir+uniprot_file trembl_location = uniprot_download_fildir+trembl_file add_trembl_annotations = 'no' ### Currently we don't need these annotations try: importEnsemblUniprot(uniprot_ens_location) except IOError: try: ### Download the data from the AltAnalyze website (if there) update.downloadCurrentVersion(uniprot_ens_location,species,'txt') importEnsemblUniprot(uniprot_ens_location) except Exception: null=[] try: uniprot_ens_location_built = string.replace(uniprot_ens_location,'UniProt','Uniprot-SWISSPROT') uniprot_ens_location_built = string.replace(uniprot_ens_location_built,'uniprot','Uniprot-SWISSPROT') importEnsemblUniprot(uniprot_ens_location_built) except Exception: null=[] ### Import UniProt annotations counts = update.verifyFile(uniprot_location,'counts') if force == 'no' or counts > 8: import_uniprot_db(uniprot_location) else: ### Directly download the data from UniProt gz_filepath, status = update.download(uniprot_filename_url,uniprot_download_fildir,'') if status == 'not-removed': try: os.remove(gz_filepath) ### Not sure why this works now and not before except OSError: status = status import_uniprot_db(uniprot_location) if add_trembl_annotations == 'yes': ### Import TreMBL annotations try: if force == 'yes': uniprot_location += '!!!!!' ### Force an IOError import_uniprot_db(trembl_location) except IOError: ### Directly download the data from UniProt update.download(trembl_filename_url,uniprot_download_fildir,'') import_uniprot_db(trembl_location) export() exportEnsemblUniprot(uniprot_ens_location)
def main(argv): parser = argparse.ArgumentParser(description="Utilities for Arabic Sentiment Analysis") subparsers = parser.add_subparsers(dest="cmd", help="Commands") import_parser = subparsers.add_parser("import", help="Import files to IR") import_parser.add_argument("-t", dest="type", choices=["csv", "xml", "flat"], required=True) import_parser.add_argument("-o", dest="output", required=True, help="Output IR file") import_parser.add_argument("input", help="Input file") tok_parser = subparsers.add_parser("tok", help="Tokenize and morphologically analyze documents in IR format") tok_parser.add_argument("-l", dest="lang", choices=["en", "ar"], default="ar", help="Tokenization language.") tok_parser.add_argument("-o", dest="output", required=True, help="Output IR file") tok_parser.add_argument("input", help="Input IR file") subsample_parser = subparsers.add_parser("sub", help="Subsample corpus in IR format") subsample_parser.add_argument("-n", dest="num", type=int, required=True, help="Number of documents to sample") subsample_parser.add_argument("-o", dest="output", required=True, help="Output IR file") subsample_parser.add_argument("input", help="Input IR file") cv_parser = subparsers.add_parser("cv", help="Generate CV folds from IR format") cv_parser.add_argument("-n", dest="num", type=int, required=True, help="Number of folds") cv_parser.add_argument("-s", dest="split", type=float, required=True, help="Split [0, 1]") cv_parser.add_argument("-o", dest="output", required=True, help="Output directory") cv_parser.add_argument("input", help="Input IR file") export_parser = subparsers.add_parser("export", help="Export from IR to desired format") export_parser.add_argument("-c", dest="cv", action="store_true", help="Specify that input cross validation.") export_parser.add_argument("-l", dest="lang", default="ar", help="Language (en, ar)") export_parser.add_argument("-n", dest="name", default="arbooks", help="Corpus name") export_parser.add_argument("-o", dest="output", required=True, help="Output file or directory") export_parser.add_argument("-t", dest="type", choices=["itm", "shlda", "svml", "mlt"], required=True) export_parser.add_argument("input", help="Input IR file") arguments = parser.parse_args(argv) #print arguments if arguments.cmd == "import": arasent_import(arguments.type, arguments.input, arguments.output); elif arguments.cmd == "tok": tokenize(arguments.input, arguments.output, arguments.lang); elif arguments.cmd == "sub": main_subsample(arguments.num, arguments.input, arguments.output); elif arguments.cmd == "cv": prep_cv(arguments.input, arguments.output, arguments.num, arguments.split); elif arguments.cmd == "export": export(arguments.cv, arguments.type, arguments.input, arguments.output, arguments.name, arguments.lang);
def start(self, gauge): global PROGRESS_MAX count = 0 while count < PROGRESS_MAX and gauge.isValid() == True: wurl = self.url + '&start=' + str(count * 15) try: getContent(wurl, self.rankList) except Timeout: continue except HTTPError: break wx.CallAfter(gauge.UpdateGauge, count, "%i of %i"%(count, PROGRESS_MAX)) count += 1 if gauge.isValid() == True: filepath = '../export_files/booklist_of_' + self.key_word export(self.rankList, filepath) box = wx.MessageDialog(None, 'Done!', 'Successfully Exported', wx.OK) box.ShowModal() box.Destroy() gauge.Destroy()
def start(self, gauge): global counter index = 0 counter = 0 thread0 = threading.Thread(target = self.getContent, args = (index, gauge)) index += 10 thread1 = threading.Thread(target = self.getContent, args = (index, gauge)) index += 10 thread2 = threading.Thread(target = self.getContent, args = (index, gauge)) thread0.start() thread1.start() thread2.start() thread0.join() thread1.join() thread2.join() if gauge.isValid() == True: filepath = '../export_files/booklist_of_' + self.key_word export(self.rankList, filepath) box = wx.MessageDialog(None, 'Done!', 'Successfully Exported', wx.OK) box.ShowModal() box.Destroy() gauge.Destroy()
##Make note import into array later and change Generate note to accept array only; use *argv or something #newInstrument() #instrument1() #Change later to GenerateInstrument(number of instruments) #addInstrumentToSong() #instrument2() #GenerateNote(64, 1567.98) #GenerateNote(16,note("80")) #addInstrumentToSong() GenerateNote(instrumentC, [[32, [note("rest")]], [32, [note("C4")]], [32, [note("C5")]]]) #GenerateNote(randomInstrument(20),randomNotes(16,16)) export() ####Execution time#### print "My program took", time.time( ) - start_time, "seconds to run (Excluding plotting time)" from evaluate import song print "Computation time for song length", (len(song) / 44100) / ( time.time() - start_time), "%" ####Execution time#### plot() print "" ''' When you try to divide a list by a real number, python says "you are crazy! You can't do that." The array is like a vector. If you divide it by a real number, each "thing" in there is divided by that number. This can be super useful. '''
except AttributeError: rate = 0 dic = {'title': title, 'pub': pubinfo, 'read': pl, 'rate': rate} lock.acquire() try: rankList.append(dic) finally: lock.release() index += 1 i += 1 if __name__ == '__main__': url = "https://book.douban.com/subject_search?search_text=" key_word = raw_input('key word:') tag = quote(key_word.encode('utf-8')) url = url + tag rankList = [] index = 0 thread0 = threading.Thread(target = getContent, args = (url, index, rankList)) index += 10 thread1 = threading.Thread(target = getContent, args = (url, index, rankList)) index += 10 thread2 = threading.Thread(target = getContent, args = (url, index, rankList)) thread0.start() thread1.start() thread2.start() thread0.join() thread1.join() thread2.join() export(rankList, key_word)
def test_export(tmp_path, example_data): # Exercise the exporter on the myriad cases parametrized in example_data. documents = example_data() export(documents, tmp_path)
PD_s_dev, "AUC": [ PD_AUC_dev, PD_AUC_val, PD_s_val, PD_AUC_S, PD_AUC_p, "yes", 0, 0, 0, PD_s_dev ], "customer_migrations": [upper_MWB, lower_MWB], "concentration_rating_grades": [HI_init, HI_curr, cr_pval, HI_curr_exp], "stability_migration_matrix": [transition_matrix_freq, z, z_pval], "avg_PD": development_set.groupby("grade").PD.mean().values, "nb_cust": development_set.grade.value_counts().sort_index().values, "orgExp_Grade": development_set.groupby("grade").original_exposure.sum().values, } export().PD_toExcel(PD_excel_input) LGD_excel_inputs = { "predictive_ability": [LGD_backtesting_ptf, LGD_backtesting_perGrade], "AUC": [ LGD_gAUC_init, LGD_gAUC_curr, LGD_S, LGD_curr_var, LGD_init_var, LGD_p_val ], "stability_migration_matrix": [z_up, z_low, zUP_pval, zDOWN_pval], } export().LGD_toExcel(development_set, LGD_excel_inputs) CCF_excel_inputs = { "predictive_ability": [CCF_backtesting_ptf, CCF_backtesting_perGrade], "AUC": [ CCF_gAUC_init, CCF_gAUC_curr, CCF_S, CCF_curr_var, CCF_p_val,