def db_connect(config=config.DATABASE_CONF): connstr = [] if config.has_key("host"): connstr.append("host=%s" % config["host"]) if config.has_key("port"): connstr.append("port=%s" % config["port"]) if config.has_key("sslmode"): connstr.append("sslmode=%s" % config["sslmode"]) connstr.append("dbname=%s user=%s password=%s" % (config["db"], config["user"], config["pw"])) return psycopg.connect(" ".join(connstr))
def db_connect(config=config.DATABASE_CONF): connstr = [] if config.has_key('host'): connstr.append("host=%s" % config['host']) if config.has_key('port'): connstr.append("port=%s" % config['port']) if config.has_key('sslmode'): connstr.append("sslmode=%s" % config['sslmode']) connstr.append("dbname=%s user=%s password=%s" % (config['db'], config['user'], config['pw'])) return psycopg.connect(' '.join(connstr))
def __init__(self, config, name): repolib.BuildRepository.__init__(self, config, name) if config.has_key("extensions"): self.valid_extensions = config["extensions"] if not os.path.isdir(self.repo_path()): raise Exception("Repository directory %s does not exists" % self.repo_path()) self.feeds = (packages_build_repository(self),)
def __init__(self, config, name): repolib.BuildRepository.__init__(self, config, name) if config.has_key("extensions"): self.valid_extensions = config['extensions'] if not os.path.isdir(self.repo_path()): raise Exception("Repository directory %s does not exists" % self.repo_path()) self.feeds = (packages_build_repository(self), )
def __init__ ( self , config , name ) : repolib.BuildRepository.__init__( self , config , name ) if not config.has_key( "architectures" ) : raise Exception( "Broken configuration : no architecture defined" ) self.architectures = config["architectures"] self.components = [] if config['components'] != ["-"] : self.components.extend( config['components'] ) if not os.path.isdir( self.repo_path() ) : raise Exception( "Repository directory %s does not exists" % self.repo_path() ) self.feeds = [] for arch in self.architectures : for compname in self.components : self.feeds.append( debian_component_repository(self,arch,compname) )
def __init__(self, config, name): repolib.BuildRepository.__init__(self, config, name) if not config.has_key("architectures"): raise Exception("Broken configuration : no architecture defined") self.architectures = config["architectures"] self.components = [] if config['components'] != ["-"]: self.components.extend(config['components']) if not os.path.isdir(self.repo_path()): raise Exception("Repository directory %s does not exists" % self.repo_path()) self.feeds = [] for arch in self.architectures: for compname in self.components: self.feeds.append( debian_component_repository(self, arch, compname))
def stats_fastq(path,samples,config): if not os.path.exists(path): return 1 n = os.listdir(path) if config.has_key("fastqc") and ("results_fastqc" in n): files = os.listdir(path+"/results_fastqc") h = ["Sample", "Link", '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/1%20Basic%20Statistics.html" target="_blank">Basic statistics</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/2%20Per%20Base%20Sequence%20Quality.html" target="_blank">Per base sequence quality</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/3%20Per%20Sequence%20Quality%20Scores.html" target="_blank">Per sequence quality scores</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/4%20Per%20Base%20Sequence%20Content.html" target="_blank">Per base sequence content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per base GC content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per sequence GC content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/6%20Per%20Base%20N%20Content.html" target="_blank">Per base N content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/7%20Sequence%20Length%20Distribution.html" target="_blank">Sequence Length Distribution</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/8%20Duplicate%20Sequences.html" target="_blank">Sequence Duplication Levels</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/9%20Overrepresented%20Sequences.html" target="_blank">Overrepresented sequences</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/11%20Kmer%20Content.html" target="_blank">Kmer Content</a>', "Total sequences", "Sequence length", "%GC"] names = ["","","per_base_quality.png","per_sequence_quality.png","per_base_sequence_content.png","per_base_gc_content.png","per_sequence_gc_content.png", "per_base_n_content.png", "sequence_length_distribution.png", "duplication_levels.png", "", "kmer_profiles.png", "", "", ""] table = list() n = "" for i in h: n = n+"<th bgcolor='#A8A8A8'>"+i+"</th>" n = "<tr>"+n+"</tr>" table.append(n) iin = 0 for sample,data in sorted(samples.iteritems()): filess = data[0:(len(data)/2)] m = list() for f in filess: n = list() f = f.split("/") f = f[len(f)-1] if f.replace(".fastq","").replace(".gz","")+"_fastqc" in files: link = "../results_fastqc/"+f.replace(".fastq","").replace(".gz","")+"_fastqc/fastqc_report.html" link = '<a href="LINK" target="_blank">Results</a>'.replace("LINK",link) n.append(link) ff = open(path+"/results_fastqc/"+f.replace(".fastq","").replace(".gz","")+"_fastqc/summary.txt",'r') for i in ff: i = i.strip("\n").split("\t") if len(names[len(n)]) > 0: G = '<a href="../results_fastqc/'+f.replace(".fastq","").replace(".gz","")+"_fastqc"+'/Images/'+names[len(n)]+'" class="lytebox lytetip" data-tip="" data-lyte-options="showPrint:true tipStyle:classic" data-lightbox="image-'+str(iin)+'" data-title="#TIT">#VAL</a>' iin += 1 else: G = "#VAL" n.append(G.replace("#VAL",i[0]).replace("#TIT", sample + "_" + str(len(m)+1))) ff.close() ff = open(path+"/results_fastqc/"+f.replace(".fastq","").replace(".gz","")+"_fastqc/fastqc_data.txt",'r') k=0 for i in ff: if k==6: n.append(i.strip("\n").split("\t")[1]) if k==8: n.append(i.strip("\n").split("\t")[1]) if k==9: n.append(i.strip("\n").split("\t")[1]) k+=1 if k>10: break ff.close() else: n = ["NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA","NA"] m.append(n) s = ["<td bgcolor='#A8A8A8'>"+sample+"</td>"] for i in range(len(m[0])): if len(m)>1: g = m[0][i]+" / "+m[1][i] else: g = m[0][i] if ("NA" in g) or ("FAIL" in g): cl = "#CC3300" elif "WARN" in g: cl = "#FFCC00" else: cl = "#00CC66" s.append("<td bgcolor='"+cl+"'>"+g+"</td>") s = "<tr>"+"".join(s)+"</tr>" table.append(s) return "<table>" + "\n".join(table) + "</table>" else: return ""
def project_process(path_base, folder): samples = get_samples(path_base, folder, path_base + "/" + folder + "/samples.list") # Check main process print "## MAIN PROCESS ###########################" try: f = open(path_base + "/" + folder + "/pid.txt", 'r') i = f.readline().strip("\n").split("\t")[1] f.close() k = manager.job_status(i) if k == 1: st = "DONE" elif k == 0: st = "RUN" else: st = "ERROR" print "- Main process (" + i + ") status: " + st except: print "- Main process not found or already finished" # Check subprocesses print "## SUBPROCESSES ###########################" pids = dict() try: f = open(path_base + "/" + folder + "/temp/pids.txt", 'r') for i in f: i = i.strip("\n").split("\t") pids[i[0]] = [i[1].split("|"),i[2].split("|")] f.close() except: print "- No subprocesses file found" f = open(path_base + "/" + folder + "/config.txt", 'r') config = dict() for i in f: if not i.startswith("%"): i = i.strip("\n").split("\t") if i[0] in ["trimgalore", "fastqc", "star", "htseq-gene", "htseq-exon", 'sam2sortbam']: i[1] = i[1].split("/")[0] if i[1] != "0": config[i[0]] = i[1] if len(config) > 0: for pg in ["trimgalore", "fastqc", "star", "htseq-gene", "htseq-exon", "sam2sortbam"]: if config.has_key(pg): if not pids.has_key(pg): print "- Already done or waiting for previous module output" else: pid = pids[pg] print "- ID: " + "|".join(pid[0]) n = list() for i in pid[1]: k = manager.job_status(i) if k == 1: n.append("DONE") elif k == 0: n.append("RUN") else: n.append("ERROR") print "- Status: " + "|".join(n) samples_v, stats = check_samples(samples, path_base, folder, pg, "update") sok = str(round(100 * float(stats[1])/float(stats[0]),2)) sko = str(round(100 * float(stats[2])/float(stats[0]),2)) pending = str(round(100 * float(stats[0]-stats[1]-stats[2])/float(stats[0]),2)) print "- Progress: " + sok + "% succeeded / " + sko + "% exited / " + pending + "% pending"
def stats_fastq(path, samples, config): if not os.path.exists(path): return 1 n = os.listdir(path) if config.has_key("fastqc") and ("results_fastqc" in n): files = os.listdir(path + "/results_fastqc") h = [ "Sample", "Link", '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/1%20Basic%20Statistics.html" target="_blank">Basic statistics</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/2%20Per%20Base%20Sequence%20Quality.html" target="_blank">Per base sequence quality</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/3%20Per%20Sequence%20Quality%20Scores.html" target="_blank">Per sequence quality scores</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/4%20Per%20Base%20Sequence%20Content.html" target="_blank">Per base sequence content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per base GC content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per sequence GC content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/6%20Per%20Base%20N%20Content.html" target="_blank">Per base N content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/7%20Sequence%20Length%20Distribution.html" target="_blank">Sequence Length Distribution</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/8%20Duplicate%20Sequences.html" target="_blank">Sequence Duplication Levels</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/9%20Overrepresented%20Sequences.html" target="_blank">Overrepresented sequences</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/11%20Kmer%20Content.html" target="_blank">Kmer Content</a>', "Total sequences", "Sequence length", "%GC" ] names = [ "", "", "per_base_quality.png", "per_sequence_quality.png", "per_base_sequence_content.png", "per_base_gc_content.png", "per_sequence_gc_content.png", "per_base_n_content.png", "sequence_length_distribution.png", "duplication_levels.png", "", "kmer_profiles.png", "", "", "" ] table = list() n = "" for i in h: n = n + "<th bgcolor='#A8A8A8'>" + i + "</th>" n = "<tr>" + n + "</tr>" table.append(n) iin = 0 for sample, data in sorted(samples.iteritems()): filess = data[0:(len(data) / 2)] m = list() for f in filess: n = list() f = f.split("/") f = f[len(f) - 1] if f.replace(".fastq", "").replace(".gz", "") + "_fastqc" in files: link = "../results_fastqc/" + f.replace( ".fastq", "").replace( ".gz", "") + "_fastqc/fastqc_report.html" link = '<a href="LINK" target="_blank">Results</a>'.replace( "LINK", link) n.append(link) ff = open( path + "/results_fastqc/" + f.replace(".fastq", "").replace(".gz", "") + "_fastqc/summary.txt", 'r') for i in ff: i = i.strip("\n").split("\t") if len(names[len(n)]) > 0: G = '<a href="../results_fastqc/' + f.replace( ".fastq", "" ).replace( ".gz", "" ) + "_fastqc" + '/Images/' + names[len( n )] + '" class="lytebox lytetip" data-tip="" data-lyte-options="showPrint:true tipStyle:classic" data-lightbox="image-' + str( iin) + '" data-title="#TIT">#VAL</a>' iin += 1 else: G = "#VAL" n.append( G.replace("#VAL", i[0]).replace( "#TIT", sample + "_" + str(len(m) + 1))) ff.close() ff = open( path + "/results_fastqc/" + f.replace(".fastq", "").replace(".gz", "") + "_fastqc/fastqc_data.txt", 'r') k = 0 for i in ff: if k == 6: n.append(i.strip("\n").split("\t")[1]) if k == 8: n.append(i.strip("\n").split("\t")[1]) if k == 9: n.append(i.strip("\n").split("\t")[1]) k += 1 if k > 10: break ff.close() else: n = [ "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA" ] m.append(n) s = ["<td bgcolor='#A8A8A8'>" + sample + "</td>"] for i in range(len(m[0])): if len(m) > 1: g = m[0][i] + " / " + m[1][i] else: g = m[0][i] if ("NA" in g) or ("FAIL" in g): cl = "#CC3300" elif "WARN" in g: cl = "#FFCC00" else: cl = "#00CC66" s.append("<td bgcolor='" + cl + "'>" + g + "</td>") s = "<tr>" + "".join(s) + "</tr>" table.append(s) return "<table>" + "\n".join(table) + "</table>" else: return ""
def stats_fastq(path, samples, config): if not os.path.exists(path): return 1 n = os.listdir(path) if config.has_key("fastqc") and ("results_fastqc" in n): # files: This variable will contain a list of all files in the results_fastqc directory files = os.listdir(path + "/results_fastqc") headers = [ "Sample", "Link", '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/1%20Basic%20Statistics.html" target="_blank">Basic statistics</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/2%20Per%20Base%20Sequence%20Quality.html" target="_blank">Per base sequence quality</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/3%20Per%20Sequence%20Quality%20Scores.html" target="_blank">Per sequence quality scores</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/4%20Per%20Base%20Sequence%20Content.html" target="_blank">Per base sequence content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/5%20Per%20Sequence%20GC%20Content.html" target="_blank">Per sequence GC content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/6%20Per%20Base%20N%20Content.html" target="_blank">Per base N content</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/7%20Sequence%20Length%20Distribution.html" target="_blank">Sequence Length Distribution</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/8%20Duplicate%20Sequences.html" target="_blank">Sequence Duplication Levels</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/9%20Overrepresented%20Sequences.html" target="_blank">Overrepresented sequences</a>', '<a href="http://www.bioinformatics.babraham.ac.uk/projects/fastqc/Help/3%20Analysis%20Modules/11%20Kmer%20Content.html" target="_blank">Kmer Content</a>', "Total sequences", "Sequence length", "%GC" ] names = [ "", "", "per_base_quality.png", "per_sequence_quality.png", "per_base_sequence_content.png", "per_base_gc_content.png", "per_sequence_gc_content.png", "per_base_n_content.png", "sequence_length_distribution.png", "duplication_levels.png", "", "kmer_profiles.png", "", "", "" ] table = list() # Write out the table headers table_row = "" for i in headers: table_row = table_row + "<th bgcolor='#A8A8A8'>" + i + "</th>" table_row = "<tr>" + table_row + "</tr>" table.append(table_row) # Write out the data in rows image_index = 0 # sample: This is the sample name # data: This is the data associated with that sample for sample, data in sorted(samples.iteritems()): # sample_files: the absolute path names of the fastq files in the sample sample_files = data[0:(len(data) / 2)] data_for_sample = list() for f in sample_files: data_for_read = list() f = f.split("/") f = f[len(f) - 1] # f now contains the name of the fastq file without the directory path if config.has_key("bowtie2"): f = f.replace("_R1_001", "_noRNA_R1_001") f = f.replace("_R2_001", "_noRNA_R2_001") if f.replace(".fastq", "").replace(".gz", "") + "_fastqc" in files: link = "../results_fastqc/" + f.replace( ".fastq", "").replace( ".gz", "") + "_fastqc/fastqc_report.html" link = '<a href="LINK" target="_blank">Results</a>'.replace( "LINK", link) # append the html for a link to the fastqc_report data_for_read.append(link) # Open the summary output file for the fastq. This wil have either 12 or 11 non-empty lines depending on whether # the Kmer report was generated. The lines start with PASS, FAIL or WARN. The next is one of the following: # Basic Statistics, Per base sequence quality, Per tile sequence quality, Per sequence quality scores, Per base sequence content, Per sequence GC content, Per base N content, # Sequence Length Distribution,Sequence Duplication Levels,Overrepresented sequences, Adapter Content, Kmer Content # The third column is always the file name summary_file = open( path + "/results_fastqc/" + f.replace(".fastq", "").replace(".gz", "") + "_fastqc/summary.txt", 'r') for i in summary_file: # The file will contain a line that is of no interest "Adapter Content". Skip it. if "Adapter Content" in i: continue # If the file contain a line "Per tile sequence quality". Skip it. if "Per tile sequence quality" in i: continue i = i.strip("\n").split("\t") # As a reminder of what is in the array names... #names = ["","","per_base_quality.png","per_sequence_quality.png","per_base_sequence_content.png","per_base_gc_content.png","per_sequence_gc_content.png", # "per_base_n_content.png", "sequence_length_distribution.png", "duplication_levels.png", "", "kmer_profiles.png", "", "", ""] if len(names[len(data_for_read)]) > 0: G = '<a href="../results_fastqc/' + f.replace( ".fastq", "" ).replace( ".gz", "" ) + "_fastqc" + '/Images/' + names[len( data_for_read )] + '" class="lytebox lytetip" data-tip="" data-lyte-options="showPrint:true tipStyle:classic" data-lightbox="image-' + str( image_index) + '" data-title="#TIT">#VAL</a>' image_index += 1 else: G = "#VAL" data_for_read.append( G.replace("#VAL", i[0]).replace( "#TIT", sample + "_" + str(len(data_for_sample) + 1))) # There may not be a Kmer line in the summary file. If not, append "NA" to inform people of this lack. if len(data_for_read) == 10: data_for_read.append("NA") summary_file.close() # Open the fastqc_data.txt file data_file = open( path + "/results_fastqc/" + f.replace(".fastq", "").replace(".gz", "") + "_fastqc/fastqc_data.txt", 'r') k = 0 for i in data_file: # Total sequences are on the sixth line of the file if k == 6: data_for_read.append(i.strip("\n").split("\t")[1]) # Sequence length is on the eigth line of the file if k == 8: data_for_read.append(i.strip("\n").split("\t")[1]) # % GC is on the ninth line of the file if k == 9: data_for_read.append(i.strip("\n").split("\t")[1]) k += 1 if k > 10: break data_file.close() else: data_for_read = [ "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA", "NA" ] data_for_sample.append(data_for_read) s = ["<td bgcolor='#A8A8A8'>" + sample + "</td>"] for col_index in range(len(data_for_sample[0])): if len(data_for_sample) > 1: result_text = data_for_sample[0][ col_index] + " / " + data_for_sample[1][col_index] else: result_text = data_for_sample[0][col_index] if ("FAIL" in result_text): cl = "#CC3300" elif "WARN" in result_text: cl = "#FFCC00" elif ("NA /" in result_text) or ("/ NA" in result_text): cl = "#A8A8A8" else: cl = "#00CC66" s.append("<td bgcolor='" + cl + "'>" + result_text + "</td>") s = "<tr>" + "".join(s) + "</tr>" table.append(s) return "<table>" + "\n".join(table) + "</table>" else: return ""
def __init__(self, filename): """Load translation, translation details and optionally a translation image""" debug(u"Begin loading translation from file: %s" % filename) # Open file & read in contents try: # Language files should be saved as UTF-8 - this conversation done now by directly reading as UTF-8 f = codecs.open(filename, "r", "UTF-8") block = f.read() f.close() except IOError: debug(u"Problem loading information from file, aborting load of translation file") raise TranslationLoadError() # Language files should be saved as UTF-8 - this conversion done now by directly reading as UTF-8 #block = block.decode("UTF-8") # Convert newlines to unix style block = block.decode("u_newlines") # Scan document for block between {}, this is our config section dicts = re.findall("(?={).+?(?<=})", block, re.DOTALL) if len(dicts) > 1: debug(u"Found more than one dict-like structure (e.g. pair of \"{}\") in file: \"%s\" - assuming config is the first one" % filename) configstring = dicts[0] debug(u"Translation file config string is: %s" % configstring) config = json.loads(configstring) conf_items = ["name", "name_translated", "language_code", "created_by", "created_date"] func_items = [self.name, self.longname, self.language_code, self.created_by, self.created_date] for ci, func in zip(conf_items, func_items): if config.has_key(ci): func(config[ci]) else: # Translation file invalid, error out of read process debug(u"Error loading translation from %s, %s field not found, aborting load of translation" % (filename, ci)) raise TranslationLoadError() # Split block up into lines block_lines = re.split("\n", block) block_lines2 = [] # Delete all items of block_lines which begin with "#" # Two pass system, first strip out comments for line in block_lines: if len(line) != 0: if line[0] != "#": block_lines2.append(line) else: block_lines2.append(line) # Translation is made up of key\nvalue\n pairs, the keys must be on odd-numbered lines, values on even (after comments are stripped) # Blank lines can only occur on even numbered lines since a blank cannot be a key. Thus we need to normalise the file for duplicate newlines # while keeping this in mind. # Starting with first line # Looking for key - Is line blank? If so discard it and start over # Looking for key - If first line isn't blank assume it is key, remove from stack # Looking for value - If next line is blank, assume it's a blank value, remove from stack # Start over looking for a key block_lines3 = [] looking_for_key = True for i in block_lines2: if looking_for_key: if len(i) != 0: block_lines3.append(i) looking_for_key = False else: block_lines3.append(i) looking_for_key = True # Check that block_lines3 is an even number of items, if not remove the last one # (The array of items must be an even number not including comments) # Now need to check through for escaped characters (\n mostly) and convert them to non-escaped versions for i in range(len(block_lines3)): block_lines3[i] = block_lines3[i].replace("\\n","\n") # Then go over the rest, two lines at a time, first line key, second line translation translation = {} keys = [] values = [] for i in range(0, len(block_lines3), 2): # Populate keys and values lists keys.append(block_lines3[i]) try: values.append(block_lines3[i+1]) except IndexError: print block_lines2[i] # Make dict from keys translation.fromkeys(keys) # Populate dict with values for i in range(len(values)): translation[keys[i]] = values[i] self.translation = translation
def project_process(path_base, folder): samples = get_samples(path_base, folder, path_base + "/" + folder + "/samples.list") # Check main process print "## MAIN PROCESS ###########################" try: f = open(path_base + "/" + folder + "/pid.txt", 'r') i = f.readline().strip("\n").split("\t")[1] f.close() k = manager.job_status(i) if k == 1: st = "DONE" elif k == 0: st = "RUN" else: st = "ERROR" print "- Main process (" + i + ") status: " + st except: print "- Main process not found or already finished" # Check subprocesses print "## SUBPROCESSES ###########################" pids = dict() try: f = open(path_base + "/" + folder + "/temp/pids.txt", 'r') for i in f: i = i.strip("\n").split("\t") pids[i[0]] = [i[1].split("|"),i[2].split("|")] f.close() except: print "- No subprocesses file found" f = open(path_base + "/" + folder + "/config.txt", 'r') config = dict() for i in f: if not i.startswith("%"): i = i.strip("\n").split("\t") if i[0] in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "varscan", "gatk"]: i[1] = i[1].split("/")[0] if i[1] != "0": config[i[0]] = i[1] if config.has_key("varscan") or config.has_key("gatk"): config["sam2sortbam"] = 1 if len(config) > 0: for pg in ["trimgalore", "fastqc", "kallisto", "star", "star-fusion", "picard", "htseq-gene", "htseq-exon", "sam2sortbam", "varscan", "gatk"]: if config.has_key(pg): print "Process: " + pg if not pids.has_key(pg): print "- Already done or waiting for previous module output" else: pid = pids[pg] print "- ID: " + "|".join(pid[0]) n = list() for i in pid[1]: k = manager.job_status(i) if k == 1: n.append("DONE") elif k == 0: n.append("RUN") else: n.append("ERROR") print "- Status: " + "|".join(n) samples_v, stats = check_samples(samples, path_base, folder, pg, "update") sok = str(round(100 * float(stats[1])/float(stats[0]),2)) sko = str(round(100 * float(stats[2])/float(stats[0]),2)) pending = str(round(100 * float(stats[0]-stats[1]-stats[2])/float(stats[0]),2)) print "- Progress: " + sok + "% succeeded / " + sko + "% exited / " + pending + "% pending"