def match(): _feature1 = request.form['feature1'] _feature2 = request.form['feature2'] _Species = request.form['Species'] _inputEmail = request.form['inputEmail'] settings = get_settings() encode_pkl = settings['Encode'] roadmap_pkl = settings['Roadmap'] GGRmap_pkl = settings['GGR'] GSMGSE_pkl = settings['GSMGSE_pkl_path'] keywords1 = _feature1.split(",") keywords2 = _feature2.split(",") output_prefix1 = keywords1[0] output_prefix2 = keywords2[0] output_path = '../tmp/' type_seq1 = 'chip-seq' type_seq2 = 'chip-seq' species = _Species if _Species != '' else 'H**o sapiens' cwd = settings['Chipseq'] CallMatch.delay(output_prefix1, output_prefix2, output_path, keywords1, [], keywords2, [], type_seq1, type_seq2, True, True, False, None, False, None, species, True, True, encode_pkl, roadmap_pkl, GGRmap_pkl, GSMGSE_pkl, cwd, 20, _inputEmail) return 'We are processing your request, results will be sent to your email'
def connectToGEO(user='******', ftpAddress='ftp.ncbi.nlm.nih.gov'): ### create GEO ftp connection to NCBI parameters = get_settings() email = parameters['email'] ftp = FTP(ftpAddress) ftp.login(user, email) return ftp
def query(): f = request.files['IDlist'] _inputEmail = request.form['inputEmail'] id_list = [] for line in f.readlines(): id_list.append(line.strip()) f.close() output_path = '../tmp/query.txt' settings = get_settings() GSMGSE_pkl = settings['GSMGSE_pkl_path'] GSM_SRR_pkl = settings['GSMtoSRRpkl'] CallQuery.delay(id_list, output_path, GSMGSE_pkl, GSM_SRR_pkl, _inputEmail) return 'We are processing your request, results will be sent to your email'
def search(): _searchterms = request.form['searchterms'] _Species = request.form['Species'] _inputEmail = request.form['inputEmail'] species = _Species if _Species != '' else 'H**o sapiens' settings = get_settings() keywords = _searchterms.split(",") output_prefix = keywords[0] output_path = './tmp/' cwd = settings['Chipseq'] CallSearch.delay(output_prefix, output_path, keywords, species, cwd, _inputEmail) return 'We are processing your request, results will be sent to your email'
def search(): _searchterms = request.form['searchterms'] _Species = request.form['Species'] _inputEmail = request.form['inputEmail'] species = _Species if _Species != '' else 'H**o sapiens' _CellLines = [k.strip() for k in request.form['CellLines'].split(",")] _CellTypes = [k.strip() for k in request.form['CellTypes'].split(",")] _Organs = [k.strip() for k in request.form['Organs'].split(",")] _Tissues = [k.strip() for k in request.form['Tissues'].split(",")] settings = get_settings() keywords = [k.strip() for k in _searchterms.split(",")] output_prefix = keywords[0] if len(_CellLines) != [] and _CellLines[0] != '': output_prefix += '_' + _CellLines[0] if len(_CellTypes) != [] and _CellTypes[0] != '': output_prefix += '_' + _CellTypes[0] if len(_Organs) != [] and _Organs[0] != '': output_prefix += '_' + _Organs[0] if len(_Tissues) != [] and _Tissues[0] != '': output_prefix += '_' + _Tissues[0] output_path = './tmp/' cwd = settings['Chipseq'] CallSearch.delay(output_prefix, output_path, keywords, species, cwd, _CellLines, _CellTypes, _Organs, _Tissues, _inputEmail=_inputEmail) return 'We are processing your request, results will be sent to your email'
def match(): _feature1 = request.form['feature1'] _feature2 = request.form['feature2'] _Species = request.form['Species'] _inputEmail = request.form['inputEmail'] settings = get_settings() keywords1 = _feature1.split(",") keywords2 = _feature2.split(",") output_prefix1 = keywords1[0] output_prefix2 = keywords2[0] output_path = './tmp/' species = _Species if _Species != '' else 'H**o sapiens' cwd = settings['Chipseq'] CallMatch.delay(output_prefix1, output_prefix2, output_path, keywords1, keywords2, species, cwd, _inputEmail) return 'We are processing your request, results will be sent to your email'
def search(): _searchterms = request.form['searchterms'] _Species = request.form['Species'] _inputEmail = request.form['inputEmail'] settings = get_settings() encode_pkl = settings['Encode'] roadmap_pkl = settings['Roadmap'] GGRmap_pkl = settings['GGR'] GSMGSE_pkl = settings['GSMGSE_pkl_path'] keywords = _searchterms.split(",") output_prefix = keywords[0] output_path = '../tmp/' keywords_begin = [] type_seq = 'chip-seq' ignorcase = True geo = False geo_file = None species = _Species encode_remove = True roadmap_remove = True cwd = settings['Chipseq'] process = 20 CallSearch.delay(output_prefix, output_path, keywords, keywords_begin, type_seq, ignorcase, geo, geo_file, species, encode_remove, roadmap_remove, encode_pkl, roadmap_pkl, GGRmap_pkl, GSMGSE_pkl, cwd, process, _inputEmail) return 'We are processing your request, results will be sent to your email'
def update(): parameters = get_settings() GSMGSE_map = load_obj(parameters['GSMGSE_pkl_path']) GSMSRR_map = load_obj(parameters['GSMtoSRRpkl']) Encode_map = load_obj(parameters['Encode']) Roadmap_map = load_obj(parameters['Roadmap']) GGR_map = load_obj(parameters['GGR']) MetaData_path = parameters["MetaData"] if MetaData_path == "None": MetaData_path = None GSMGSE_map, Encode_map, Roadmap_map, GGR_map, GSM_need_update = \ updateGSMGSE_Encode_Roadmap(GSMGSE_map, Encode_map, Roadmap_map, GGR_map, MetaData_path) GSMSRR_map = updateGSMSRR(GSMSRR_map, GSM_need_update) save_obj(GSMGSE_map, parameters['GSMGSE_pkl_path'][:-4]) save_obj(Encode_map, parameters['Encode'][:-4]) save_obj(Roadmap_map, parameters['Roadmap'][:-4]) save_obj(GGR_map, parameters['GGR'][:-4]) save_obj(GSMSRR_map, parameters['GSMtoSRRpkl'][:-4])
def CIG_grid(): ''' this function provie an entrance to search function ''' if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help' not in sys.argv): # at least one parameter need to be specified, will print help message if no parameter is specified print "\nusage:\n\npython danpos.py grid [optional arguments] <target_table1> <target_table2> <danpos_result_path> <features> <output_prefix> <output_path>\n\nfor more help, please try: python danpos.py grid -h\n" return 1 parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, usage= "\n\npython danpos.py grid [optional arguments] <target_table1> <target_table2> <danpos_result_path> <features> <output_prefix> <output_path>\n\n", description='', epilog="Chen lab, Houston Methodist") parser.add_argument('command', default=None, help="set as 'grid' to perform parameter optimization") parser.add_argument( 'target_table1', default=None, help= "The first table of genes, containing the columns at least 'gene_id', 'sample_prefix', table delimiter is recognized by the file surfix (csv, tsv, txt, xls, xlsx)", ) parser.add_argument( 'target_table2', default=None, help= "The second table of genes, containing the columns at least 'gene_id', 'sample_prefix', table delimiter is recognized by the file surfix (csv, tsv, txt, xls, xlsx)" ) parser.add_argument( 'danpos_result_path', default=None, help= "folder containing the danpos peak calling result tables, make sure the tables startswith sample_prefix and with '_' as delimiter in the name" ) parser.add_argument('features', default=None, help="") parser.add_argument('output_prefix', default=None, help="the prefix for output files") parser.add_argument('output_path', default=None, help="the output path") parser.add_argument( 'up_stream_grid', default=None, help= "the optimization grid for upstream distance, in the format:'10000:2:1000', meaning, the start grid is 10000, every iteration the grid shrink for 2 times, and the final grid need to be larger than 1000" ) parser.add_argument( 'down_stream_grid', default=None, help= "the optimization grid for downstream distance, in the format:'10000:2:1000', meaning, the start grid is 10000, every iteration the grid shrink for 2 times, and the final grid need to be larger than 1000" ) parser.add_argument( 'height_grid', default=None, help= "the optimization grid for height, in the format:'10000:2:1000', meaning, the start grid is 10000, every iteration the grid shrink for 2 times, and the final grid need to be larger than 1000" ) ## optional parameters parser.add_argument('-f', dest='function', metavar='', default='wilcoxon', help="wilcoxon or fisher") args = None if '-h' in sys.argv or '--help' in sys.argv: # print help information once required by user print "\Chipseqpair\n" parser.print_help() print "\n" return 0 elif len(sys.argv) >= 3: try: args = parser.parse_args() except: print "\nfor more help, please try: python CSP.py search -h\n" return 1 if args is not None: settings = get_settings() encode_pkl = settings['Encode'] roadmap_pkl = settings['Roadmap'] GGRmap_pkl = settings['GGR'] GSMGSE_pkl = settings['GSMGSE_pkl_path'] keywords = args.feature_key_words.split(",") output_prefix = args.output_prefix if output_prefix is None: output_prefix = keywords[0] output_path = args.output_path if output_path is None: output_path = './search_output/' if not os.path.isdir(output_path): os.system("mkdir search_output") if args.keywords_begin == '': keywords_begin = [] else: keywords_begin = args.keywords_begin.split(",") type_seq = args.type_seq ignorcase = args.ignorecase geo = args.geo geo_file = args.geo_file species = args.species encode_remove = 1 roadmap_remove = 1 cwd = args.MetaData process = args.process if cwd is None: cwd = settings['MetaData'] if cwd is None or cwd == "None": cwd = None encode_remove = True roadmap_remove = True SOFTQuickParser(output_prefix, output_path, keywords, keywords_begin, type_seq=type_seq, ignorecase=ignorcase, geo=geo, geofile=geo_file, output_type=species, encode_remove=encode_remove, roadmap_remove=roadmap_remove, encode_pkl=encode_pkl, roadmap_pkl=roadmap_pkl, GGRmap_pkl=GGRmap_pkl, GSMGSE_pkl=GSMGSE_pkl, cwd=cwd, process=process) return return 1
def GCF_query(): ''' this function provide an entrance to query function ''' if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help' not in sys.argv): # at least one parameter need to be specified, will print help message if no parameter is specified print "\nusage:\n\npython CSP.py query [optional arguments] <ID list> <outputpath>\n\nfor more help, please try: python CSP.py query -h\n" return 1 parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, usage= "\n\npython CSP.py query [optional arguments] <ID list> <outputpath>\n\n", description='', epilog="Chen lab, Houston Methodist") parser.add_argument( 'command', default=None, help="set as 'query' to looking for samples' NGS sequencing information" ) parser.add_argument( 'IDs', default=None, help= "list of IDs need to used to looking for the NGS sequencing information, " "it could be a list of IDs separated by ',', or a file containing a list of IDs," " accepted IDs: GSM, GSE, SRR, SRP, SRX, SAMN, SRP, ", ) parser.add_argument('-o', '--output_path', dest='output_path', default=None, metavar='', help="specify the output file name and path.") args = None if '-h' in sys.argv or '--help' in sys.argv: # print help information once required by user print "\Chipseqpair\n" parser.print_help() print "\n" return 0 elif len(sys.argv) >= 3: try: args = parser.parse_args() except: print "\nfor more help, please try: python CSP.py query -h\n" return 1 if args is not None: output_path = args.output_path if output_path is None: output_path = os.getcwd() + "/query.txt" GEO_ids = args.IDs if os.path.exists(GEO_ids) and os.path.isfile(GEO_ids): list_names_obj = open(GEO_ids, "r") list_names = [] for line in list_names_obj.readlines(): line = line.strip().split(",") for l in line: list_names.append(l) list_names_obj.close() id_list = list(set(list_names)) else: id_list = GEO_ids.split(",") settings = get_settings() GSMGSE_pkl = settings['GSMGSE_pkl_path'] GSM_SRR_pkl = settings['GSMtoSRRpkl'] GEO_query(id_list, output_path, GSMGSE_pkl, GSM_SRR_pkl) return return 1
def GCF_search(): ''' this function provie an entrance to search function ''' if (len(sys.argv) < 3) and ('-h' not in sys.argv) and ('--help' not in sys.argv): # at least one parameter need to be specified, will print help message if no parameter is specified print "\nusage:\n\npython CSP.py search [optional arguments] <features> <output_prefix> <output_path>\n\nfor more help, please try: python CSP.py search -h\n" return 1 parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, usage= "\n\npython CSP.py search [optional arguments] <features> <output_prefix> <output_path>\n\n", description='', epilog="Chen lab, Houston Methodist") parser.add_argument( 'command', default=None, help="set as 'search' to looking for samples with key words") parser.add_argument( 'feature_key_words', default=None, help= "list of feature key words need to used to looking for the NGS sequencing samples, " "different key words need to be separated by ','") ## optional parameters parser.add_argument( '-b', dest='keywords_begin', metavar='', default='', help= "list of key words in features need to be used to occur in the beginning of a word, " "different key words need to be separated by ','") parser.add_argument( '-t', '--type', dest='type_seq', metavar='', default='chip-seq', help="type of sequencing specified in the search. Default is 'chip-seq" ) parser.add_argument( '-c', '--ignorecase', dest='ignorecase', metavar='', default=1, type=int, help= "specify whether case (A vs a) need to be ignored in the search. Default is 1 which means case will be ignored in the search. Set to 0 if don't want to ignore the case. " ) parser.add_argument( '--geo', dest='geo', default=0, type=int, metavar='', help= "specify whether search will be limited to GEO website search result. Default is 0. Set to 1 if only want to correct GEO search results." ) parser.add_argument( '--candidateslist', dest='geo_file', default=None, metavar='', help= "specify the file path of GSM ID list if '--hasCandidates' set to 1") parser.add_argument( '-s', '--species', dest='species', default='H**o sapiens', metavar='', help= "specify the samples' species. Default is H**o sapiens. Please use the species official name. For example, human is H**o sapiens." "If the species name contains space, surround the name with double quotes, for example \"H**o sapiens\"" ) parser.add_argument('-m', '--metadata', dest='MetaData', default=None, metavar='', help="specify the GSMs metadata files path") parser.add_argument( '-p', '--process', dest='process', default=20, type=int, metavar='', help="specify the number of parallel search processes want to use.") parser.add_argument('-f', '--output_prefix', dest='output_prefix', default=None, metavar='', help="specify the output file prefix.") parser.add_argument('-o', '--output_path', dest='output_path', default=None, metavar='', help="specify the output file location.") args = None if '-h' in sys.argv or '--help' in sys.argv: # print help information once required by user print "\Chipseqpair\n" parser.print_help() print "\n" return 0 elif len(sys.argv) >= 3: try: args = parser.parse_args() except: print "\nfor more help, please try: python CSP.py search -h\n" return 1 if args is not None: settings = get_settings() encode_pkl = settings['Encode'] roadmap_pkl = settings['Roadmap'] GGRmap_pkl = settings['GGR'] GSMGSE_pkl = settings['GSMGSE_pkl_path'] keywords = args.feature_key_words.split(",") output_prefix = args.output_prefix if output_prefix is None: output_prefix = keywords[0] output_path = args.output_path if output_path is None: output_path = './search_output/' if not os.path.isdir(output_path): os.system("mkdir search_output") if args.keywords_begin == '': keywords_begin = [] else: keywords_begin = args.keywords_begin.split(",") type_seq = args.type_seq ignorcase = args.ignorecase geo = args.geo geo_file = args.geo_file species = args.species encode_remove = 1 roadmap_remove = 1 cwd = args.MetaData process = args.process if cwd is None: cwd = settings['MetaData'] if cwd is None or cwd == "None": cwd = None encode_remove = True roadmap_remove = True SOFTQuickParser(output_prefix, output_path, keywords, keywords_begin, type_seq=type_seq, ignorecase=ignorcase, geo=geo, geofile=geo_file, output_type=species, encode_remove=encode_remove, roadmap_remove=roadmap_remove, encode_pkl=encode_pkl, roadmap_pkl=roadmap_pkl, GGRmap_pkl=GGRmap_pkl, GSMGSE_pkl=GSMGSE_pkl, cwd=cwd, process=process) return return 1
for gse in sampleSeriesID: if gse in excludeGSEs: exclude = True if not exclude and sampleLibraryStrategy.lower() == 'chip-seq': results.append(sample) for gse_id in sample.series: GSE_GSM[gse_id].add(sample.id) queue.put((results, GSE_GSM)) if db is not None: db.close() settings = get_settings() encode_pkl = settings['Encode'] roadmap_pkl = settings['Roadmap'] GGRmap_pkl = settings['GGR'] encodeGSE = load_obj(encode_pkl) roadmapGSE = load_obj(roadmap_pkl) GGRmapGSE = load_obj(GGRmap_pkl) excludedGSE = set() excludedGSE = excludedGSE.union(encodeGSE) excludedGSE = excludedGSE.union(roadmapGSE) excludedGSE = excludedGSE.union(GGRmapGSE)