Esempio n. 1
0
 def test_dry_run_enabled(self):
     """
     Tests the method ``check_dry_run`` for returning True when the ``Connection`` class is
     instantiated in dry-run mode.
     """
     self.conn = Connection(eu.DCC_DEV_MODE, dry_run=True, no_log_file=True)
     self.assertEqual(True, self.conn.check_dry_run())
Esempio n. 2
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    infile = args.infile
    outfile = args.outfile
    dcc_mode = args.dcc_mode

    conn = Connection(dcc_mode=dcc_mode)

    fh = open(infile, 'r')
    fout = open(outfile, 'w')
    for line in fh:
        rec = line.strip("\n").split("\t")[0]
        if not rec or rec.startswith("#"):
            fout.write(line)
            continue
        rec = conn.get(rec_ids=rec, ignore404=False)
        aliases = rec["aliases"]
        for a in aliases:
            line = [line.strip("\n")]
            outline = line.extend(aliases)
            fout.write("\t".join(line) + "\n")
    fout.close()
    fh.close()
 def __init__(self, steps, metadata_json, server, lab, award):
     super(Accession, self).__init__()
     self.set_lab_award(lab, award)
     self.analysis = Analysis(metadata_json)
     self.steps_and_params_json = self.file_to_json(steps)
     self.backend = self.analysis.backend
     self.conn = Connection(server)
     self.new_files = []
     self.current_user = self.get_current_user()
Esempio n. 4
0
def main():
    """Program
    """
    EXP_PROFILE_ID = "experiment"
    FILE_PROFILE_ID = "file"
    VALID_PROFILES = [EXP_PROFILE_ID, FILE_PROFILE_ID]
    parser = get_parser()
    args = parser.parse_args()
    infile = args.infile
    outfile = args.outfile
    dcc_mode = args.dcc_mode

    conn = Connection(dcc_mode)

    fh = open(infile, 'r')
    fout = open(outfile, 'w')
    for line in fh:
        rec_id = line.strip("\n").split("\t")[0]
        if not rec_id or rec_id.startswith("#"):
            continue
        rec = conn.get(rec_id, ignore404=False)
        profile = conn.profiles.get_profile_from_id(rec["@id"])
        profile_id = profile.name
        if profile_id not in VALID_PROFILES:
            raise Exception(
                "Record identifier '{}' must be an identifer for an object of a type in the set {}."
                .format(rec_id, VALID_PROFILES))

        if profile_id == EXP_PROFILE_ID:
            # List of FASTQ file objects in JSON format.
            fastq_recs = conn.get_fastqfiles_on_exp(rec_id)
            exp_accession = rec["accession"]
        else:
            fastq_recs = [conn.get(rec_id, ignore404=False)]
            exp_accession = fastq_recs[0]["dataset"].split("/")[-1]
        for fq_rec in fastq_recs:
            status = fq_rec["status"]
            error_msg = ""
            if status == "content error":
                error_msg = fq_rec["content_error_detail"]
                fout.write("\t".join([exp_accession, rec_id, error_msg]) +
                           "\n")
    fout.close()
    fh.close()
Esempio n. 5
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    infile = args.infile
    outfile = args.outfile
    dcc_mode = args.dcc_mode
    submitter_lab = args.submitter_lab
    if not submitter_lab:
        submitter_lab = encode_utils.LAB_PREFIX.rstrip(":")

    conn = Connection(dcc_mode=dcc_mode)

    fh = open(infile, 'r')
    fout = open(outfile, 'w')
    for line in fh:
        alias = line.strip("\n").split("\t")[0]
        if not alias or alias.startswith("#"):
            fout.write(line)
            continue
        alias_lab_prefix = alias.split(":", 1)
        try:
            lab_prefix, alias_name = alias.split(":", 1)
        except ValueError:
            if not submitter_lab:
                raise Exception(
                    "Unknown submitting lab name for alias {}. See description for --submitter-lab  argument."
                    .format(alias))
            alias = submitter_lab + ":" + alias
        rec = conn.get(rec_ids=alias, ignore404=False)
        try:
            dcc_id = rec["accession"]
        except KeyError:
            dcc_id = rec["uuid"]
        line = [line.strip("\n")]
        outline = line.append(dcc_id)
        fout.write("\t".join(line) + "\n")
    fout.close()
    fh.close()
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    infile = args.infile
    outfile = args.outfile
    dcc_mode = args.dcc_mode

    conn = Connection(dcc_mode)

    fh = open(infile, 'r')
    fout = open(outfile, 'w')
    for line in fh:
        rec_id = line.strip()
        if not rec_id or rec_id.startswith("#"):
            continue
        rec = conn.get(rec_id, ignore404=True)
        if not rec:
            print("'{}' not found.".format(rec_id))
            fout.write(rec_id + "\n")
    fout.close()
    fh.close()
Esempio n. 7
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    mode = args.dcc_mode
    exp_id = args.exp_id
    bio_rep_num = args.bio_rep_num
    tech_rep_num = args.tech_rep_num

    conn = Connection(mode)
    rep_dico = conn.get_fastqfile_replicate_hash(exp_id)

    for b in rep_dico:
        if bio_rep_num and b != bio_rep_num:
            continue
        for t in rep_dico[b]:
            if tech_rep_num and t != tech_rep_num:
                continue
            for read_num in rep_dico[b][t]:
                for fastq_json in rep_dico[b][t][read_num]:
                    alias = fastq_json["aliases"][0]
                    print("_".join([str(b), str(t),
                                    str(read_num)]) + "\t" + alias)
Esempio n. 8
0
 def test_arbitrary_host(self):
     self.conn = Connection(dcc_mode='test.encodedcc.org', no_log_file=True)
Esempio n. 9
0
 def setUp(self):
     self.conn = Connection(eu.DCC_DEV_MODE, no_log_file=True)
Esempio n. 10
0
def test_connection_dcc_mode_https_url(mocker):
    mocker.patch("requests.get")
    conn = Connection("https://www.foo.bar", no_log_file=True)
    assert conn.dcc_mode.url == "https://www.foo.bar"
Esempio n. 11
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    dcc_mode = args.dcc_mode
    infile = args.infile
    protocol_uuid = args.protocol_uuid

    # connect to DCC
    conn = Connection(dcc_mode)

    barplot_description = "Barplot showing the expression of the given gene in the control vs. the treatment. Expression is given in Transcripts Per Million (TPM) and was generated by version 1.2.30 of RSEM's rsem-calculate-expression script."
    fh = open(infile, 'r')
    header = fh.readline().strip("\n")
    if not header.startswith("#"):
        raise Exception(
            "First line of input file must be a field-header line starting with a '#'."
        )
    dico = {
    }  # key: library accession, value: {barplot: local_barplot_path, line: line_from_input_file}
    # store a list of all exp IDs seen in input file so we can later link the
    # analysis protocol doc to the exp.
    exp_encids = []
    for line in fh:
        line = line.strip("\n")
        if not line.strip():
            continue
        line = line.split("\t")
        dcc_exp_id = line[0].strip()
        if dcc_exp_id not in exp_encids:
            exp_encids.append(dcc_exp_id)
        dcc_rep_id = line[1].strip()
        rep_json = conn.get(rep_id, ignore404=False)
        dcc_lib_id = rep_json["library"]["accession"]
        barplot = line[2].strip()
        dico[dcc_lib_id] = {"barplot": barplot, "line": line}
    fh.close()

    fout = open(OUTPUT_FILE, 'w')
    fout.write(header + "\tjpeg_dcc_uuid\n")
    count = 0
    for lib_id in dico:
        #  count += 1
        barplot = dico[lib_id]["barplot"]
        download_filename = lib_id + "_relative_knockdown.jpeg"
        # download_filename is the name the user will get when they downoad the
        # file from the ENCODE Portal.
        dcc_uuid = conn.post_document(download_filename=download_filename,
                                      document=barplot,
                                      document_type="data QA",
                                      document_description=barplot_description)
        line = dico[lib_id]["line"]
        line.append(dcc_uuid)
        fout.write("\t".join(line) + "\n")
        # link document to library
        conn.link_document(rec_id=lib_id, dcc_document_uuid=dcc_uuid)
    fout.close()

    print(
        "Linking RSEM analysis and plotting protocol document to each experiment"
    )
    for exp in exp_encids:
        conn.link_document(rec_id=exp, document_id=protocol_uuid)
Esempio n. 12
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    mode = args.dcc_mode
    exp_id = args.exp_id

    conn = Connection(mode)
    exp_rep_dico = conn.get_fastqfile_replicate_hash(exp_id)
    exp_json = conn.get(exp_id, ignore404=True)
    controls = exp_json["possible_controls"]  # A list of dicts.

    # Populate a controls-lookup hash. The keys will be the ctl accessions. Each value will be
    # the replicates hash (return value of conn.get_fastqfile_replicate_hash().
    controls_hash = {}  # A dict of dicts.
    control_bio_rep_counts = []
    for c in controls:
        ctl_accession = c["accession"]
        controls_hash[ctl_accession] = {}
        ctl_rep_dico = conn.get_fastqfile_replicate_hash(ctl_accession)
        controls_hash[ctl_accession]["rep_dico"] = ctl_rep_dico
        control_bio_rep_counts.append(len(ctl_rep_dico.keys()))

    # Make sure that all control experiments have the same number of biological replicates. There are
    # no known rules to apply otherwise.
    if len(set(control_bio_rep_counts)) != 1:
        raise Exception(
            "The controls '{controls}' have different numbers of biological replicates from one another '{rep_nums}'."
            .format(controls=control_ids, rep_nums=control_bio_rep_counts))

    # Make sure that the number of control bio reps equals the number of experiment bio reps:
    exp_bio_rep_count = len(exp_rep_dico.keys())
    if exp_bio_rep_count != control_bio_rep_counts[0]:
        raise Exception(
            "The number of experiment replicates '{}' doesn't equal the number of control replicates '{}'."
            .format(exp_bio_rep_count, control_bio_rep_counts[0]))

    # Now we'll look at each bio rep on the experiment, in numerical order of
    # biological_replicate_number from least to greatest. We'll work our way all the down to the
    # FASTQ files and start populating the File.controlled_by property in the following manner:
    #
    #  For each control, we'll sort the replicates the same was as we did for the ones on the
    #  experiment, then for the replicate having the same ordinal index, we'll add the FASTQ File
    #  references.

    sorted_exp_bio_reps = sorted(exp_rep_dico)
    count = -1
    # And now for the nastiest for-loop I've ever written ... this should be cleaned up but the logic
    # is so rough to implement that it'll be ugly any way we look at it.
    for b in sorted_exp_bio_reps:  # biological_replicate_number
        count += 1
        for t in exp_rep_dico[b]:  # technical_replicate_number
            for read_num in exp_rep_dico[b][t]:
                for fastq_json in exp_rep_dico[b][t][read_num]:
                    exp_file_acc = fastq_json["accession"]
                    controlled_by = []
                    for c in controls_hash:
                        ctl_bio_rep_num = sorted(
                            controls_hash[c]["rep_dico"])[count]
                        ctl_tech_reps = controls_hash[c]["rep_dico"][
                            ctl_bio_rep_num]
                        for ctl_tech_rep_num in ctl_tech_reps:
                            for ctl_encff in ctl_tech_reps[ctl_tech_rep_num][
                                    read_num]:
                                controlled_by.append(ctl_encff["accession"])
                    conn.patch(
                        {
                            conn.ENCID_KEY: exp_file_acc,
                            "controlled_by": controlled_by
                        },
                        extend_array_values=False)
Esempio n. 13
0
def _main():
    main_args = _parse_args()
    test_demo_url = os.environ['TEST_DEMO_URL']
    conn = Connection(test_demo_url, main_args.dry_run)
    conn.regenerate_aws_upload_creds(main_args.file_id)