def load_gff_file(): create_go_table() src = os.path.abspath(os.path.join(os.pardir, 'data', reference_go)) dest = os.path.abspath(os.path.join('data', reference_go)) shutil.copyfile(src, dest) delete_first_line(dest) load_table('gotable', dest)
def load_promoters_to_mysql(self): ls = ['10000', '5000', '2000', '1000', '500'] for l in ls: filename = 'data/promoters.sql.' + l + '.txt' tname = "promoter_" + l print(filename, tname) create_promoter_table(tname) load_table(tname, filename)
def get_first_line(): src = os.path.abspath(os.path.join(os.pardir, 'data', reference_ortholog)) cmd = 'head -n 1 ' + src p = subprocess.Popen(["head", "-n", "1", src], stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, err = p.communicate() #a = os.system(cmd) line = out.decode("utf-8") line = line.rstrip("\r|\n") ortho = [] for col in line.split("\t"): if ('gene_id' in col): continue if ('_desc' in col): continue ortho.append(col) a = os.path.abspath( os.path.join(os.pardir, 'bin', 'php', 'settings', 'ortholog.php')) php = "<?php\n$ortholog = ['{0}'];\n?>".format("','".join(ortho)) with open(a, "w") as outfile: outfile.write(php) create_ortholog(ortho) if __name__ == '__main__': get_first_line() a = os.path.abspath(os.path.join(os.pardir, 'data', reference_ortholog)) load_table('ortholog', a)
print(filename) ## define original source src = os.path.join(rnaseq_dir, filename) ## define path of destination dest = os.path.abspath(os.path.join('data', filename)) ## copy source to destination shutil.copyfile(src, dest) ## delete the first line delete_first_line(dest) ## remove inf and -inf and replace it with 100000 parse_infinity(dest) ## Make a R file with only the experiments a = create_R_file(dest) ## Get the experiments list print(a) ## Collect the R file list outfiles.append(dest) create_rnaseq_table() for file in outfiles: load_table('rnaseq', file) expslist, hash = combine_rnaseq(outfiles) rnaseq_combine_file = os.path.abspath( os.path.join('data', 'rnaseq.combine')) rnaseqcombine(expslist, hash, rnaseq_combine_file) update_rnaseq(expslist, rnaseq_combine_file) delete_first_line(rnaseq_combine_file) create_rnaseq_expression_table(expslist) load_table('rnaseq_expression', rnaseq_combine_file)
from mysql_tables import create_pathway_table, load_table sys.path.append(os.path.abspath('../')) from data import * def genfile(boo): b = os.getcwd() a = os.path.abspath(os.path.join(os.pardir, 'bin', 'php', 'settings', 'pathway.php')) php = "<?php\n$available_pathway = {0};\n?>".format(boo) with open(a, "w") as outfile: outfile.write(php) def add_description(): print('add pathway') genfile('TRUE') def no_description(): print('no pathway') genfile('FALSE') if __name__ == '__main__': print(reference_pathway) if reference_pathway: create_pathway_table() a = os.path.abspath(os.path.join(os.pardir, 'data', reference_pathway)) load_table('pathway', a) add_description() else: no_description()
gene_id, gene_name = extract_gene_id_name(cols[8]) return gene_id, chr, gene_start, gene_end, gene_length, gene_strand, gene_name def parse_gff(filename, outfile): fh = open(filename) out = open(outfile, 'w') for rec in fh: if rec[0] == '#': continue rec = rec.rstrip("\r|\n") cols = rec.split("\t") if cols[2] != 'gene': continue if 'scaffold' in cols[0]: continue #print(">> ", rec) #print("\t".join(extract_gene_info(cols))) out.write("\t".join(extract_gene_info(cols)) + "\n") fh.close() out.close() if __name__ == '__main__': #filename = 'data/Homo_sapiens.GRCh37.75.gtf' #filename = 'data/test' #filename = 'data/Zea_mays.AGPv3.31.gtf' parse_gff('../data/' + reference_gtf, 'data/' + reference_gtf + '.parse') create_gff_table() load_table('gfftest', 'data/' + reference_gtf + '.parse')
def genfile(boo): b = os.getcwd() a = os.path.abspath( os.path.join(os.pardir, 'bin', 'php', 'settings', 'descriptions.php')) php = "<?php\n$available_description = {0};\n?>".format(boo) with open(a, "w") as outfile: outfile.write(php) def add_description(): print('add description') genfile('TRUE') def no_description(): print('no description') genfile('FALSE') if __name__ == '__main__': print(reference_description) if reference_description: create_gene_descriptions() a = os.path.abspath( os.path.join(os.pardir, 'data', reference_description)) load_table('gene_descriptions', a) add_description() else: no_description()
def parse_exp(filename, modfilename): cmd = 'sed -i "s/,/\\t/g;s/$/\\t' + modfilename + '/g" ' + filename os.system(cmd) if __name__ == '__main__': filename = '../data_chipseq/RA1_all_high_confidence_peaks.xls' chipseq_dir = os.path.abspath(os.path.join(os.pardir, 'data_chipseq')) files = [f for f in os.listdir(chipseq_dir) if re.match(r'.*\.csv', f)] for filename in files: ## define original source src = os.path.join(chipseq_dir, filename) ## define path of destination dest = os.path.abspath(os.path.join('data', filename)) print(filename, src, dest) ## copy source to destination shutil.copyfile(src, dest) ## Create table and load it modfilename = filename.replace('.csv', '') modfilename = modfilename.replace('\.', '') create_chipseq() ## delete the first line delete_first_line(dest) parse_comma(dest) parse_exp(dest, modfilename) load_table('chipseq', dest) #create_chipseq_table(filename) #get_peak_seqs()
def load_cluster(): ref_dir = os.path.abspath(os.path.join(os.pardir, 'data')) cluster = os.path.join(ref_dir, 'cluster.txt') create_cluster load_table('cluster', cluster)