コード例 #1
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    mode = args.dcc_mode
    exp_id = args.exp_id
    bio_rep_num = args.bio_rep_num
    tech_rep_num = args.tech_rep_num

    conn = Connection(mode)
    rep_dico = conn.get_fastqfile_replicate_hash(exp_id)

    for b in rep_dico:
        if bio_rep_num and b != bio_rep_num:
            continue
        for t in rep_dico[b]:
            if tech_rep_num and t != tech_rep_num:
                continue
            for read_num in rep_dico[b][t]:
                for fastq_json in rep_dico[b][t][read_num]:
                    alias = fastq_json["aliases"][0]
                    print("_".join([str(b), str(t),
                                    str(read_num)]) + "\t" + alias)
コード例 #2
0
def main():
    """Program
    """
    parser = get_parser()
    args = parser.parse_args()
    mode = args.dcc_mode
    exp_id = args.exp_id

    conn = Connection(mode)
    exp_rep_dico = conn.get_fastqfile_replicate_hash(exp_id)
    exp_json = conn.get(exp_id, ignore404=True)
    controls = exp_json["possible_controls"]  # A list of dicts.

    # Populate a controls-lookup hash. The keys will be the ctl accessions. Each value will be
    # the replicates hash (return value of conn.get_fastqfile_replicate_hash().
    controls_hash = {}  # A dict of dicts.
    control_bio_rep_counts = []
    for c in controls:
        ctl_accession = c["accession"]
        controls_hash[ctl_accession] = {}
        ctl_rep_dico = conn.get_fastqfile_replicate_hash(ctl_accession)
        controls_hash[ctl_accession]["rep_dico"] = ctl_rep_dico
        control_bio_rep_counts.append(len(ctl_rep_dico.keys()))

    # Make sure that all control experiments have the same number of biological replicates. There are
    # no known rules to apply otherwise.
    if len(set(control_bio_rep_counts)) != 1:
        raise Exception(
            "The controls '{controls}' have different numbers of biological replicates from one another '{rep_nums}'."
            .format(controls=control_ids, rep_nums=control_bio_rep_counts))

    # Make sure that the number of control bio reps equals the number of experiment bio reps:
    exp_bio_rep_count = len(exp_rep_dico.keys())
    if exp_bio_rep_count != control_bio_rep_counts[0]:
        raise Exception(
            "The number of experiment replicates '{}' doesn't equal the number of control replicates '{}'."
            .format(exp_bio_rep_count, control_bio_rep_counts[0]))

    # Now we'll look at each bio rep on the experiment, in numerical order of
    # biological_replicate_number from least to greatest. We'll work our way all the down to the
    # FASTQ files and start populating the File.controlled_by property in the following manner:
    #
    #  For each control, we'll sort the replicates the same was as we did for the ones on the
    #  experiment, then for the replicate having the same ordinal index, we'll add the FASTQ File
    #  references.

    sorted_exp_bio_reps = sorted(exp_rep_dico)
    count = -1
    # And now for the nastiest for-loop I've ever written ... this should be cleaned up but the logic
    # is so rough to implement that it'll be ugly any way we look at it.
    for b in sorted_exp_bio_reps:  # biological_replicate_number
        count += 1
        for t in exp_rep_dico[b]:  # technical_replicate_number
            for read_num in exp_rep_dico[b][t]:
                for fastq_json in exp_rep_dico[b][t][read_num]:
                    exp_file_acc = fastq_json["accession"]
                    controlled_by = []
                    for c in controls_hash:
                        ctl_bio_rep_num = sorted(
                            controls_hash[c]["rep_dico"])[count]
                        ctl_tech_reps = controls_hash[c]["rep_dico"][
                            ctl_bio_rep_num]
                        for ctl_tech_rep_num in ctl_tech_reps:
                            for ctl_encff in ctl_tech_reps[ctl_tech_rep_num][
                                    read_num]:
                                controlled_by.append(ctl_encff["accession"])
                    conn.patch(
                        {
                            conn.ENCID_KEY: exp_file_acc,
                            "controlled_by": controlled_by
                        },
                        extend_array_values=False)