def main(args): logger.info('Reading track dataset...') git_df = pd.read_pickle(GIT_FILE_PATH) logger.info('Loading topic extraction model...') final_pipe = load_final_pipe() repos = git_df['full_text_cleaned'].values logger.info('Predicting topics...') topics = final_pipe.transform(repos) logger.info('Writting results...') show_results(git_df, repos, topics, args.output, args.format)
def main(args): logger.info('Reading track dataset...') protocols_df = pd.read_pickle(PROTOCOLS_FILE_PATH) logger.info('Loading topic extraction model...') final_pipe = load_final_pipe() protocols = protocols_df['full_text_cleaned'].values logger.info('Predicting topics...') topics = final_pipe.transform(protocols) logger.info('Writting results...') show_results(protocols_df, protocols, topics, args.output, args.format)
def main(args): logger.info('Loading article data...') pmc_df = load_articles_df(args.input, args.isFile) logger.info('Loading topic extraction model...') final_pipe = load_final_pipe() articles = pmc_df['text_cleaned'].values logger.info('Predicting topics...') topics = final_pipe.transform(articles) logger.info('Writting results...') show_results(pmc_df, articles, topics, args.output, args.format)
def main(args): logger.info('Reading track dataset...') pmc_df = pd.read_pickle(PMC_FILE_PATH) logger.info('Loading topic extraction model...') final_pipe = load_final_pipe() articles = pmc_df['text_cleaned'].values logger.info('Predicting topics...') topics = final_pipe.transform(articles) logger.info('Writting results...') show_results(pmc_df, articles, topics, args.output, args.format)
def main(args): logger.info('Loading repository data...') git_df = load_repos_df(args.input, args.isFile, args.token) logger.info('Loading topic extraction model...') final_pipe = load_final_pipe() repos = git_df['full_text_cleaned'].values logger.info('Predicting topics...') topics = final_pipe.transform(repos) logger.info('Writting results...') show_results(git_df, repos, topics, args.output, args.format)
def main(args): logger.info('Loading protocol data...') protocols_df = load_protocols_df(args.input, args.isFile) logger.info('Loading topic extraction model...') final_pipe = load_final_pipe() protocols = protocols_df['full_text_cleaned'].values logger.info('Predicting topics...') topics = final_pipe.transform(protocols) logger.info('Writting results...') show_results(protocols_df, protocols, topics, args.output, args.format)
from timeit import Timer import common logging.basicConfig(level=logging.DEBUG, format='[%(levelname)s] (%(threadName)s) %(message)s ' ) def makePool_debug(num_threads): pool = ThreadPool(num_threads) results = pool.map(logging.debug, common.files) pool.close() pool.join() def secvential_debug(files): for file in files: logging.debug(file) if __name__ == "__main__": t = Timer("makePool_debug(%s)" % common.number_of_threads, "from __main__ import makePool_debug") result_thredead_debug = min(t.repeat(repeat=1, number=1)) t = Timer("secvential_debug(%s)" % common.files, "from __main__ import secvential_debug") result_secvential_debug = min(t.repeat(repeat=1, number=1)) print(common.show_results("thredead_debug(%s threads)" % common.number_of_threads, result_thredead_debug)) print(common.show_results("secvential_debug() : ", result_secvential_debug))
import common def makePool_count(num_threads): pool = ThreadPool(num_threads) results = pool.map(secvential_count, common.files) pool.close() pool.join() def secvential_count(files): total = 0 for _ in files: total += 1 if __name__ == "__main__": t = Timer("makePool_count(%s)" % common.number_of_threads, "from __main__ import makePool_count") result_thredead_count = min(t.repeat(repeat=1, number=1)) t = Timer("secvential_count(%s)" % common.files, "from __main__ import secvential_count") result_secvential_count = min(t.repeat(repeat=1, number=1)) print( common.show_results( "thredead_count(%s threads)" % common.number_of_threads, result_thredead_count)) print(common.show_results("secvential_count()", result_secvential_count))
pool = ThreadPool(num_threads) results = pool.map(print, files) pool.close() pool.join() def secvential_print(files): for file in files: print(file) if __name__ == "__main__": repeat = 1 number = 1 number_of_threads = common.number_of_threads files = common.files t = Timer("makePool_print(%s)" % number_of_threads, "from __main__ import makePool_print") result_thredead_print = min(t.repeat(repeat=repeat, number=number)) t = Timer("secvential_print(%s)" % files, "from __main__ import secvential_print") result_secvential_print = min(t.repeat(repeat=repeat, number=number)) print( common.show_results("thredead_print(%s threads)" % number_of_threads, result_thredead_print)) print(common.show_results("secvential_print() : ", result_secvential_print))