def load(species, chipseq, paired, project_name, rerun_number, pi, investigator, tumor_type, pipeline, revision_number, project_dir, conn, log_file): stats = list() if species.lower() in ['human','hg18','hg19','hybrid','b37','grch37','xenograft']: stats = load_exome_stats.human_stats.values() if chipseq: stats = load_exome_stats.human_chipseq_stats.values() elif species.lower() in ['mouse','mm9','mm10']: stats = load_exome_stats.mouse_stats.values() if chipseq: stats = load_exome_stats.mouse_chipseq_stats.values() else: log_helper.report_error_and_exit(log_file, "species is not recognized: '%s'" % (species)) ## If paired add the one PE dependent stat if paired: stats.extend(load_exome_stats.paired_end_stats.values()) validate_files(project_name, project_dir, stats, log_file) # now load data log_file.write("LOG: Loading title file\n") load_exome_samples.load(project_name, rerun_number, pi, investigator, 'unknown', pipeline, revision_number, get_title_file(project_name, project_dir), conn, log_file) for stat in stats: file_name = get_stat_file(project_name, project_dir, stat) log_file.write("LOG: Loading stat '%s' from '%s'\n" % (stat.name, file_name)) load_exome_stats.load(species, chipseq, paired, stat.name, project_name, pi, investigator, rerun_number, revision_number, file_name, conn, log_file)
def load(species, assay, chipseq, paired, project_name, rerun_number, pi, investigator, tumor_type, pipeline, revision_number, project_dir, conn, log_file): stats = list() if species.lower() in ['human','hg18','hg19','hybrid','b37','grch37','xenograft']: stats = load_exome_stats.human_stats.values() if chipseq: stats = load_exome_stats.human_chipseq_stats.values() if 'wgs' in assay.lower(): stats = load_exome_stats.human_wgs_stats.values() elif species.lower() in ['mouse','mm9','mm10']: stats = load_exome_stats.mouse_stats.values() if chipseq: stats = load_exome_stats.mouse_chipseq_stats.values() else: log_helper.report_error_and_exit(log_file, "species is not recognized: '%s'" % (species)) ## If paired add the one PE dependent stat if paired: stats.extend(load_exome_stats.paired_end_stats.values()) validate_files(project_name, project_dir, stats, log_file) # now load data log_file.write("LOG: Loading title file\n") load_exome_samples.load(project_name, rerun_number, pi, investigator, 'unknown', pipeline, revision_number, get_title_file(project_name, project_dir), conn, log_file) for stat in stats: file_name = get_stat_file(project_name, project_dir, stat) log_file.write("LOG: Loading stat '%s' from '%s'\n" % (stat.name, file_name)) load_exome_stats.load(species, assay, chipseq, paired, stat.name, project_name, pi, investigator, rerun_number, revision_number, file_name, conn, log_file)
def validate_files(project_name, project_dir, stats, log_file): if not os.path.exists(project_dir): log_helper.report_error_and_exit( log_file, "Project directory '%s' does not exist" % (project_dir)) title_file = get_title_file(project_name, project_dir) if not os.path.exists(title_file): log_helper.report_error_and_exit( log_file, "Sample title file '%s' does not exist" % (title_file)) for stat in stats: file = get_stat_file(project_name, project_dir, stat) if not file: # get_stat_file checks that it exists log_helper.report_error_and_exit( log_file, "%s '%s[%s]' does not exist" % (stat.name, os.path.join(project_dir, "metrics/Proj_" + project_name), "|".join(stat.file_suffixes))) ## check for MarkDuplicates, CutAdapt file separately - TEMPORARILY ## TO DO: add these to load_stats.stats once database is updated if not os.path.exists( os.path.join( project_dir, "results/metrics/Proj_%s_markDuplicatesMetrics.txt" % (project_name))): if not os.path.exists( os.path.join( project_dir, "metrics/Proj_%s_markDuplicatesMetrics.txt" % (project_name))): log_helper.report_error_and_exit( log_file, "%s '%s[%s]' does not exist" % (MarkDuplicates, os.path.join(project_dir, "metrics/Proj_" + project_name), "_markDuplicatesMetrics.txt")) if not os.path.exists( os.path.join( project_dir, "results/metrics/Proj_%s_CutAdaptStats.txt" % (project_name))): if not os.path.exists( os.path.join( project_dir, "metrics/Proj_%s_CutAdaptStats.txt" % (project_name))): log_helper.report_error_and_exit( log_file, "%s '%s[%s]' does not exist" % (CutAdaptStats, os.path.join(project_dir, "metrics/Proj_" + project_name), "_CutAdaptStats.txt"))
def validate_files(project_name, project_dir, stats, log_file): if not os.path.exists(project_dir): log_helper.report_error_and_exit(log_file, "Project directory '%s' does not exist" % (project_dir)) title_file = get_title_file(project_name, project_dir) if not os.path.exists(title_file): log_helper.report_error_and_exit(log_file, "Sample title file '%s' does not exist" % (title_file)) for stat in stats: file = get_stat_file(project_name, project_dir, stat) if not file: # get_stat_file checks that it exists log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist" % (stat.name, os.path.join(project_dir, "metrics/Proj_" + project_name), "|".join(stat.file_suffixes))) ## check for MarkDuplicates, CutAdapt file separately - TEMPORARILY ## TO DO: add these to load_stats.stats once database is updated if not os.path.exists(os.path.join(project_dir, "results/metrics/Proj_%s_markDuplicatesMetrics.txt" %(project_name))): if not os.path.exists(os.path.join(project_dir, "metrics/Proj_%s_markDuplicatesMetrics.txt" %(project_name))): log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist" %(MarkDuplicates, os.path.join(project_dir, "metrics/Proj_" + project_name), "_markDuplicatesMetrics.txt")) if not os.path.exists(os.path.join(project_dir, "results/metrics/Proj_%s_CutAdaptStats.txt" %(project_name))): if not os.path.exists(os.path.join(project_dir, "metrics/Proj_%s_CutAdaptStats.txt" %(project_name))): log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist" %(CutAdaptStats, os.path.join(project_dir, "metrics/Proj_" + project_name), "_CutAdaptStats.txt"))
def validate_files(project_name, project_dir, stats, log_file): if not os.path.exists(project_dir): log_helper.report_error_and_exit(log_file, "Project directory '%s' does not exist" % (project_dir)) title_file = get_title_file(project_name, project_dir) if not os.path.exists(title_file): log_helper.report_error_and_exit(log_file, "Sample title file '%s' does not exist" % (title_file)) for stat in stats: file = get_stat_file(project_name, project_dir, stat) if not file: # get_stat_file checks that it exists log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist" % (stat.name, os.path.join(project_dir, "metrics/Proj_" + project_name), "|".join(stat.file_suffixes)))
paired = False stat_type, request_file, revision_number, in_file_name = args if not os.path.exists(request_file): print "ERROR: Can't find request file %s" %request_file sys.exit(-1) project_id, rerun_number, pi, investigator, species, tumor_type, pipeline = parse_request(request_file) if not project_id or not rerun_number or not pi or not investigator: print "ERROR: Required info missing from request file" sys.exit(-1) conn = None with open(log_file_name, "w") as log_file: try: conn = mysql.connector.connect(**db_config.params) in_file_name = os.path.abspath(in_file_name) load(species, chipseq, paired, stat_type, project_id, pi, investigator, rerun_number, revision_number, in_file_name, conn, log_file) conn.commit() conn.close() except Exception as e: if conn: conn.close() #raise log_helper.report_error_and_exit(log_file, str(e))