Beispiel #1
0
def upload_to_gdocs(fcdir, credentials_file=None, gdocs_folder=None):

    output_data = {
        'stdout': StringIO(),
        'stderr': StringIO(),
        'debug': StringIO()
    }

    if not os.path.exists(fcdir):
        LOG.error("The run folder, {} does not exist!".format(
            os.path.basename(fcdir)))
        return output_data

    credentials = google.get_credentials(credentials_file)
    if credentials is None:
        LOG.error("Could not parse the Google Docs credentials")
        return output_data

    metrics = collect_metrics(fcdir)
    samples = _format_samples(metrics)

    ssheet_name = _demultiplex_spreadsheet(metrics['RunInfo'].get(
        'Date', None))
    ssheet = SpreadSheet(credentials, ssheet_name)
    ssheet.move_to_folder(gdocs_folder)

    run_id = metrics['RunInfo']['Id'].split("_")
    wsheet_name = "_".join([run_id[0], run_id[-1]])

    # Write the metrics for the entire flowcell
    write_flowcell_metrics(samples, ssheet, wsheet_name)

    # Write project-centered metrics
    projects = list(set([sample.get('Project name', '')
                         for sample in samples]))
    for project in projects:
        if project in ['Undetermined_indices', '']:
            continue
        project_samples = [
            sample for sample in samples
            if sample.get('Project name', '') == project
        ]
        # Insert the run name as description
        for sample in project_samples:
            sample['Description'] = wsheet_name

        ssheet_name = "{}_sequencing_results".format(project)
        ssheet = SpreadSheet(credentials, ssheet_name)
        ssheet.move_to_folder(gdocs_folder)
        # Truncate the summary worksheet so that it won't show the wrong information in case upload fails
        write_flowcell_metrics([], ssheet, "Summary")
        write_flowcell_metrics(project_samples, ssheet, wsheet_name)

        # Create the summary over all worksheets in the project
        summary_samples = summarize_project(ssheet)
        write_flowcell_metrics(summary_samples, ssheet, "Summary")

    return output_data
def main(project_name, conf, cred):
    credentials = get_credentials(cred)
    client = SpreadSheet(credentials)
    config = cl.load_config(conf)
    couch = load_couch_server(conf)
    analysis_db = couch['analysis']
    #proj_db = couch['projects']
    BP_RNA = DB.BP_RNA(project_name)
    key = find_proj_from_view(analysis_db, project_name)
    BP_RNA.obj['_id'] = find_or_make_key(key)
    info = save_couchdb_obj(analysis_db, BP_RNA.obj)
    LOG.info('project %s %s : _id = %s' % (project_name, info, BP_RNA.obj['_id']))
Beispiel #3
0
    def __init__(self, project_name, config):
        """Initialize the object"""

        # Map internal attribute names to the GPL column headers
        col_mapping = self.column_mapping()
        for attr in col_mapping.keys():
            setattr(self, attr, None)

        # Get the name of the spreadsheet where uppnex ids can be found
        gdocs_config = config.get("gdocs", config.get("gdocs_upload",{}))
        cred_file = gdocs_config.get("credentials_file",gdocs_config.get("gdocs_credentials"))
        ssheet_title = gdocs_config.get("projects_spreadsheet")
        wsheet_title = gdocs_config.get("projects_worksheet")

        # Get the credentials
        credentials = get_credentials(cred_file)
        assert credentials is not None, \
        "The Google Docs credentials could not be found."
        assert ssheet_title is not None and wsheet_title is not None, \
            "The names of the projects spreadsheet and worksheet on Google \
            Docs could not be found."

        # Connect to the spread- and worksheet
        ssheet = SpreadSheet(credentials, ssheet_title)
        assert ssheet is not None, \
            "Could not fetch '{}' from Google Docs.".format(ssheet_title)

        # We allow multiple, comma-separated worksheets to be searched
        for wtitle in wsheet_title.split(','):
            wsheet = ssheet.get_worksheet(wtitle.strip())
            if not wsheet:
                print("WARNING: Could not locate {} in {}".format(wsheet_title, ssheet_title))
                continue

            # Get the rows for the project
            rows = ssheet.get_cell_content(wsheet)
            header = ssheet.get_header(wsheet)
            column_indexes = {attr: ssheet.get_column_index(wsheet,col)-1 for attr, col in col_mapping.items()}
            for row in rows:
                # skip if this is not the project we're interested in
                if row[column_indexes["project_name"]] != project_name:
                    continue
                
                # Will only use the first result found to set each attribute
                for attr, index in column_indexes.items():
                    setattr(self, attr, row[index])

                # We have found the project data so stop iterating
                return
def get_20158_info(credentials, project_name_swe):
    versions = {
        "01": [
            'Sample name Scilife', "Total reads per sample", "Sheet1",
            "Passed=P/ not passed=NP*"
        ],
        "02": [
            "Sample name (SciLifeLab)", "Total number of reads (Millions)",
            "Sheet1",
            "Based on total number of reads after mapping and duplicate removal"
        ],
        "03": [
            "Sample name (SciLifeLab)", "Total number of reads (Millions)",
            "Sheet1",
            "Based on total number of reads after mapping and duplicate removal "
        ],
        "05": [
            "Sample name (from Project read counts)", "Total number", "Sheet1",
            "Based on total number of reads",
            "Based on total number of reads after mapping and duplicate removal"
        ],
        "06": [
            "Sample name (from Project read counts)", "Total number", "Sheet1",
            "Based on total number of reads",
            "Based on total number of reads after mapping and duplicate removal"
        ]
    }
    info = {}
    client = SpreadSheet(credentials)
    feed = client.get_spreadsheets_feed(project_name_swe + '_20158', False)
    if len(feed.entry) != 0:
        ssheet = feed.entry[0].title.text
        version = ssheet.split(str('_20158_'))[1].split(' ')[0].split('_')[0]
        client = SpreadSheet(credentials, ssheet)
        content, ws_key, ss_key = get_google_document(ssheet,
                                                      versions[version][2],
                                                      client)
        dummy, P_NP_colindex = get_column(content, versions[version][3])
        dummy, No_reads_sequenced_colindex = get_column(
            content, versions[version][1])
        row_ind, scilife_names_colindex = get_column(content,
                                                     versions[version][0])
        if (version == "05") | (version == "06"):
            dummy, P_NP_duprem_colindex = get_column(
                content, versions[version][4])  ## [version][4] for dup rem
        else:
            P_NP_duprem_colindex = ''
        for j, row in enumerate(content):
            if (j > row_ind):
                try:
                    sci_name = str(row[scilife_names_colindex]).strip()
                    striped_name = strip_index(sci_name)
                    no_reads = str(row[No_reads_sequenced_colindex]).strip()
                    if (P_NP_duprem_colindex != '') and (str(
                            row[P_NP_duprem_colindex]).strip() != ''):
                        status = str(row[P_NP_duprem_colindex]).strip()
                    else:
                        status = str(row[P_NP_colindex]).strip()
                    info[striped_name] = [status, no_reads]
                except:
                    pass
    else:
        info = None
    return info