Esempio n. 1
0
def load(species, chipseq, paired, project_name, rerun_number, pi, investigator, tumor_type, pipeline, revision_number, project_dir, conn, log_file):
    stats = list()

    if species.lower() in ['human','hg18','hg19','hybrid','b37','grch37','xenograft']:
        stats = load_exome_stats.human_stats.values()
        if chipseq:
            stats = load_exome_stats.human_chipseq_stats.values()
    elif species.lower() in ['mouse','mm9','mm10']:
        stats = load_exome_stats.mouse_stats.values()
        if chipseq:
            stats = load_exome_stats.mouse_chipseq_stats.values()
    else:
        log_helper.report_error_and_exit(log_file, "species is not recognized: '%s'" % (species))

    ## If paired add the one PE dependent stat
    if paired:
        stats.extend(load_exome_stats.paired_end_stats.values())

    validate_files(project_name, project_dir, stats, log_file)
    # now load data 
    log_file.write("LOG: Loading title file\n")
    load_exome_samples.load(project_name, rerun_number, pi, investigator, 'unknown', pipeline, revision_number, get_title_file(project_name, project_dir), conn, log_file)
    for stat in stats:
        file_name = get_stat_file(project_name, project_dir, stat)
        log_file.write("LOG: Loading stat '%s' from '%s'\n" % (stat.name, file_name))
        load_exome_stats.load(species, chipseq, paired, stat.name, project_name, pi, investigator, rerun_number, revision_number, file_name, conn, log_file)
def load(species, assay, chipseq, paired, project_name, rerun_number, pi, investigator, tumor_type, pipeline, revision_number, project_dir, conn, log_file):
    stats = list()

    if species.lower() in ['human','hg18','hg19','hybrid','b37','grch37','xenograft']:
        stats = load_exome_stats.human_stats.values()
        if chipseq:
            stats = load_exome_stats.human_chipseq_stats.values()
        if 'wgs' in assay.lower():
            stats = load_exome_stats.human_wgs_stats.values()
    elif species.lower() in ['mouse','mm9','mm10']:
        stats = load_exome_stats.mouse_stats.values()
        if chipseq:
            stats = load_exome_stats.mouse_chipseq_stats.values()
    else:
        log_helper.report_error_and_exit(log_file, "species is not recognized: '%s'" % (species))

    ## If paired add the one PE dependent stat
    if paired:
        stats.extend(load_exome_stats.paired_end_stats.values())

    validate_files(project_name, project_dir, stats, log_file)
    # now load data 
    log_file.write("LOG: Loading title file\n")
    load_exome_samples.load(project_name, rerun_number, pi, investigator, 'unknown', pipeline, revision_number, get_title_file(project_name, project_dir), conn, log_file)
    for stat in stats:
        file_name = get_stat_file(project_name, project_dir, stat)
        log_file.write("LOG: Loading stat '%s' from '%s'\n" % (stat.name, file_name))
        load_exome_stats.load(species, assay, chipseq, paired, stat.name, project_name, pi, investigator, rerun_number, revision_number, file_name, conn, log_file)
def validate_files(project_name, project_dir, stats, log_file):
    if not os.path.exists(project_dir):
        log_helper.report_error_and_exit(
            log_file, "Project directory '%s' does not exist" % (project_dir))

    title_file = get_title_file(project_name, project_dir)
    if not os.path.exists(title_file):
        log_helper.report_error_and_exit(
            log_file, "Sample title file '%s' does not exist" % (title_file))
    for stat in stats:
        file = get_stat_file(project_name, project_dir, stat)
        if not file:  # get_stat_file checks that it exists
            log_helper.report_error_and_exit(
                log_file, "%s '%s[%s]' does not exist" %
                (stat.name,
                 os.path.join(project_dir, "metrics/Proj_" + project_name),
                 "|".join(stat.file_suffixes)))

    ## check for MarkDuplicates, CutAdapt file separately - TEMPORARILY
    ## TO DO: add these to load_stats.stats once database is updated
    if not os.path.exists(
            os.path.join(
                project_dir,
                "results/metrics/Proj_%s_markDuplicatesMetrics.txt" %
                (project_name))):
        if not os.path.exists(
                os.path.join(
                    project_dir, "metrics/Proj_%s_markDuplicatesMetrics.txt" %
                    (project_name))):
            log_helper.report_error_and_exit(
                log_file, "%s '%s[%s]' does not exist" %
                (MarkDuplicates,
                 os.path.join(project_dir, "metrics/Proj_" + project_name),
                 "_markDuplicatesMetrics.txt"))
    if not os.path.exists(
            os.path.join(
                project_dir, "results/metrics/Proj_%s_CutAdaptStats.txt" %
                (project_name))):
        if not os.path.exists(
                os.path.join(
                    project_dir, "metrics/Proj_%s_CutAdaptStats.txt" %
                    (project_name))):
            log_helper.report_error_and_exit(
                log_file, "%s '%s[%s]' does not exist" %
                (CutAdaptStats,
                 os.path.join(project_dir, "metrics/Proj_" + project_name),
                 "_CutAdaptStats.txt"))
def validate_files(project_name, project_dir, stats, log_file):
    if not os.path.exists(project_dir):
        log_helper.report_error_and_exit(log_file, "Project directory '%s' does not exist"  % (project_dir))

    title_file = get_title_file(project_name, project_dir)
    if not os.path.exists(title_file):
        log_helper.report_error_and_exit(log_file, "Sample title file '%s' does not exist"  % (title_file))
    for stat in stats:
        file = get_stat_file(project_name, project_dir, stat)
        if not file: # get_stat_file checks that it exists
            log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist"  % (stat.name, os.path.join(project_dir, "metrics/Proj_" + project_name), "|".join(stat.file_suffixes)))

    ## check for MarkDuplicates, CutAdapt file separately - TEMPORARILY
    ## TO DO: add these to load_stats.stats once database is updated
    if not os.path.exists(os.path.join(project_dir, "results/metrics/Proj_%s_markDuplicatesMetrics.txt" %(project_name))):
        if not os.path.exists(os.path.join(project_dir, "metrics/Proj_%s_markDuplicatesMetrics.txt" %(project_name))):
            log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist" %(MarkDuplicates, os.path.join(project_dir, "metrics/Proj_" + project_name), "_markDuplicatesMetrics.txt"))
    if not os.path.exists(os.path.join(project_dir, "results/metrics/Proj_%s_CutAdaptStats.txt" %(project_name))):
        if not os.path.exists(os.path.join(project_dir, "metrics/Proj_%s_CutAdaptStats.txt" %(project_name))):            
            log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist" %(CutAdaptStats, os.path.join(project_dir, "metrics/Proj_" + project_name), "_CutAdaptStats.txt")) 
Esempio n. 5
0
def validate_files(project_name, project_dir, stats, log_file):
    if not os.path.exists(project_dir):
        log_helper.report_error_and_exit(log_file, "Project directory '%s' does not exist"  % (project_dir))

    title_file = get_title_file(project_name, project_dir)
    if not os.path.exists(title_file):
        log_helper.report_error_and_exit(log_file, "Sample title file '%s' does not exist"  % (title_file))
    for stat in stats:
        file = get_stat_file(project_name, project_dir, stat)
        if not file: # get_stat_file checks that it exists
            log_helper.report_error_and_exit(log_file, "%s '%s[%s]' does not exist"  % (stat.name, os.path.join(project_dir, "metrics/Proj_" + project_name), "|".join(stat.file_suffixes)))
             paired = False

    stat_type, request_file, revision_number, in_file_name = args

    if not os.path.exists(request_file):
        print "ERROR: Can't find request file %s" %request_file
        sys.exit(-1)

    project_id, rerun_number, pi, investigator, species, tumor_type, pipeline = parse_request(request_file)

    if not project_id or not rerun_number or not pi or not investigator:
        print "ERROR: Required info missing from request file"
        sys.exit(-1)


    conn = None
    with open(log_file_name, "w") as log_file:
        try:
            conn = mysql.connector.connect(**db_config.params)
    
            in_file_name = os.path.abspath(in_file_name)
            load(species, chipseq, paired, stat_type, project_id, pi, investigator, rerun_number, revision_number, in_file_name, conn, log_file)
    
            conn.commit()
            conn.close() 
        except Exception as e:
            if conn:
                conn.close() 
            #raise
            log_helper.report_error_and_exit(log_file, str(e))
             paired = False

    stat_type, request_file, revision_number, in_file_name = args

    if not os.path.exists(request_file):
        print "ERROR: Can't find request file %s" %request_file
        sys.exit(-1)

    project_id, rerun_number, pi, investigator, species, tumor_type, pipeline = parse_request(request_file)

    if not project_id or not rerun_number or not pi or not investigator:
        print "ERROR: Required info missing from request file"
        sys.exit(-1)


    conn = None
    with open(log_file_name, "w") as log_file:
        try:
            conn = mysql.connector.connect(**db_config.params)
    
            in_file_name = os.path.abspath(in_file_name)
            load(species, chipseq, paired, stat_type, project_id, pi, investigator, rerun_number, revision_number, in_file_name, conn, log_file)
    
            conn.commit()
            conn.close() 
        except Exception as e:
            if conn:
                conn.close() 
            #raise
            log_helper.report_error_and_exit(log_file, str(e))