def validate_chipseq_experiments(self): for rec_id in self.chipseq_experiment_ids: exp = models.ChipseqExperiment(rec_id) str_exp_id = str(exp.id) target = models.Target(exp.target_id) target_upstream = target.upstream_identifier.strip() if not target_upstream: if target.id not in self.unregistered_targets: self.unregistered_targets.append(target.id) msg = "Target {} not registred.".format(target.id) self.fout.write(str_exp_id + "\t" + msg + "\n") exp_rep_ids = exp.replicate_ids if not exp_rep_ids: msg = "ChipseqExperiment missing replicates." self.fout.write(str_exp_id + "\t" + msg + "\n") else: for rep_id in exp_rep_ids: self.validate_gm_for_crispr_biosample(chip_exp_id=exp.id, biosample_id=rep_id) # Check WT input present if not exp.wild_type_control_id: msg = "ChipseqExperiment missing WT control." self.fout.write(str_exp_id + "\t" + msg + "\n") # Check Paired input present and has GM pi_ids = exp.control_replicate_ids if not pi_ids: msg = "ChipseqExperiment missing paired input." self.fout.write(str_exp_id + "\t" + msg + "\n") else: for pi_id in pi_ids: self.validate_gm_for_crispr_biosample(chip_exp_id=exp.id, biosample_id=pi_id) self.fout.close()
def main(): parser = get_parser() args = parser.parse_args() url = args.url infile = args.infile if url: conn = euc.Connection("prod") results = conn.search(url=url) else: fh = open(infile) results = json.load(fh) admin = models.User.find_by({"first_name": "Admin"}) if not admin: raise Exception("Could not find the Admin user in the database, which is needed for associating with new records.") created = 0 patched = 0 total = 0 for rec in results: patch = False total += 1 organism = rec["organism"]["scientific_name"] if organism != SPECIES: continue payload = {} label = rec["label"] payload["name"] = label # Check if the target already exists in the database. pulsar_record = models.Target.find_by({"name": label}) upstream = rec["@id"].strip("/").split("/")[-1] if pulsar_record and upstream != pulsar_record["upstream_identifier"]: patch = True elif pulsar_record: continue # Can add support for patch operation later. payload["upstream_identifier"] = upstream payload["user_id"] = admin["id"] xrefs = rec["dbxref"] for ref in xrefs: tokens = ref.split(":") prefix, ref = ref.rsplit(":", 1) if prefix == "ENSEMBL": payload["ensembl"] = ref elif prefix == "UniProtKB": payload["uniprotkb"] = ref elif prefix == "RefSeq": payload["refseq"] = ref print("Creating {}".format(payload)) if patch: target = models.Target(pulsar_record["id"]) target.patch(payload) patched += 1 print("Patched: {}".format(patched)) else: models.Target.post(payload) created += 1 print("Created: {}".format(created)) print("Total processed: {}".format(total))
def validate_gm_for_crispr_biosample(self, chip_exp_id, biosample_id): biosample = models.Biosample(biosample_id) gm_id = biosample.crispr_modification_id if not gm_id: msg = "Exp. biosample {} missing GM.".format(biosample.id) self.fout.write((str(chip_exp_id) + "\t" + msg + "\n")) else: # Verify that GM target is present. gm = models.CrisprModification(gm_id) donor_construct = models.DonorConstruct(gm.donor_construct_id) dc_target = models.Target(donor_construct.target_id) if not dc_target.upstream_identifier: if dc_target.id not in self.unregistered_targets: self.unregistered_targets.append(dc_target.id) msg = "Target {} not registred.".format(dc_target.id) self.fout.write(str(chip_exp_id) + "\t" + msg + "\n") ccs = [models.CrisprConstruct(x) for x in gm.crispr_construct_ids] for c in ccs: cc_target = models.Target(c.target_id) if not cc_target.upstream_identifier: if cc_target.id not in self.unregistered_targets: self.unregistered_targets.append(cc_target.id) msg = "Target {} not registred.".format(cc_target.id) self.fout.write((str(chip_exp_id) + "\t" + msg + "\n"))