예제 #1
0
 def parse(self):
     # The following genes are taken from Table 1 of the paper.
     # Validated EGFR-sensitizing genes.
     sli_list = []
     egfr = 'EGFR'
     egfr_id = self.get_ncbigene_curie(egfr)
     # Note -- I removed LOC63920 and LOC284393 from this list, I could not identify them in HGNC
     table1_symbols = {'ABL1', 'AKT2', 'ANXA6', 'ARF4', 'ARF5', 'ASCL2', 'BCAR1', 'CALM1', 'CBLC', 'CCND1', 'CD59',
                       'CDH3', 'CXCL12', 'DCN', 'DDR2', 'DIXDC1', 'DLG4', 'DUSP4', 'DUSP6', 'DUSP7', 'EPHA5',
                       'ERBB3', 'FER',  'FGFR2', 'FLNA', 'GRB7', 'HSPA9', 'INPPL1', 'KLF10',
                       'LTK', 'MAP3K1', 'MAPK1', 'MATK', 'NEDD9', 'NOTCH2', 'PIK3R1', 'PIK3R2',
                       'PIN1', 'PKN2', 'PLSCR1', 'PPIA', 'PRKACB', 'PRKCD', 'PRKCE', 'PRKCZ', 'PTPRF', 'RAC1',
                       'RAPGEF1', 'RASA3', 'RET', 'RPS6KA5', 'SC4MOL',
                       'SH2D3C', 'SHC1', 'SMAD2', 'SOS2', 'STAT3', 'TBL1Y', 'VAV3'}
     for geneB in table1_symbols:
         geneB = self.get_current_symbol(geneB)
         if geneB in self.entrez_dict:
             geneB_id = self.get_ncbigene_curie(geneB)
         else:
             raise ValueError("Could not get NCBI id for gene \"%s\" in Blomen 2015" % geneB)
         sli = SyntheticLethalInteraction(gene_A_symbol=egfr,
                                          gene_A_id=egfr_id,
                                          gene_B_symbol=geneB,
                                          gene_B_id=geneB_id,
                                          gene_A_pert=SlConstants.INHIBITORY_ANTIBODY,
                                          gene_B_pert=SlConstants.SI_RNA,
                                          effect_type=SlConstants.N_A,
                                          effect_size=0,
                                          cell_line=SlConstants.A431_CELL,
                                          cellosaurus_id=SlConstants.A431_CELLOSAURUS,
                                          cancer_type=SlConstants.N_A,
                                          ncit_id=SlConstants.N_A,
                                          assay=SlConstants.CELL_VIABILITY_ASSAY,
                                          pmid=self.pmid,
                                          SL=True)
         sli_list.append(sli)
     # The authors have one additional SLI
     # Analysis based on the Chou-Talalay coefficient of interaction showed that the small-molecule AURKA inhibitor
     # PHA-680632 (29) synergized with erlotinib in reducing cell viability of both A431 and HCT116 cells (Fig. 6B).
     # In HCT116 cells, we found strong synergy (coefficient of interaction values <0.5) between cetuximab and either
     # PHA-680632 or another AURKA inhibitor, C1368
     aurka = 'AURKA'
     aurka_id = self.get_ncbigene_curie(aurka)
     sli = SyntheticLethalInteraction(gene_A_symbol=egfr,
                                      gene_A_id=egfr_id,
                                      gene_B_symbol=aurka,
                                      gene_B_id=aurka_id,
                                      gene_A_pert=SlConstants.PHARMACEUTICAL,
                                      gene_B_pert=SlConstants.PHARMACEUTICAL,
                                      effect_type=SlConstants.N_A,
                                      effect_size=0,
                                      cell_line=SlConstants.HCT_116,
                                      cellosaurus_id=SlConstants.HCT_116_CELLOSAURUS,
                                      cancer_type=SlConstants.N_A,
                                      ncit_id=SlConstants.N_A,
                                      assay=SlConstants.CELL_VIABILITY_ASSAY,
                                      pmid=self.pmid,
                                      SL=True)
     sli_list.append(sli)
     return sli_list
 def parse(self):
     """
     While POLQ serves as a positive control, FEN1 and APEX2 represent novel B2SL genes and novel potential drug
     targets in BRCA-deficient tumors. The authors do not concretely name the entire list of SLIs, so
     we restrict ourselves to the three that are investigated in detail. FEN1 was also validated for BRCA1
     """
     brca2 = 'BRCA2'
     brca2_id = self.get_ncbigene_curie(brca2)
     geneBlist = {'POLQ', 'FEN1', 'APEX2'}
     sli_list = []
     for geneB in geneBlist:
         geneB_id = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(
             gene_A_symbol=brca2,
             gene_A_id=brca2_id,
             gene_B_symbol=geneB,
             gene_B_id=geneB_id,
             gene_A_pert=SlConstants.LOF_MUTATION,
             gene_B_pert=SlConstants.CRISPR_CAS9,
             effect_type=SlConstants.N_A,
             effect_size=SlConstants.N_A,
             cell_line=SlConstants.PEO1_CELL,
             cellosaurus_id=SlConstants.PEO1_CELLOSAURUS,
             cancer_type=SlConstants.N_A,
             ncit_id=SlConstants.N_A,
             assay=SlConstants.MULTICOLOR_COMPETITION_ASSAY,
             pmid=self.pmid,
             SL=True)
         sli_list.append(sli)
     brca1 = 'BRCA1'
     brca1_id = self.get_ncbigene_curie(brca1)
     geneBlist = {'FEN1', 'APEX2'}
     for geneB in geneBlist:
         geneB_id = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(
             gene_A_symbol=brca1,
             gene_A_id=brca1_id,
             gene_B_symbol=geneB,
             gene_B_id=geneB_id,
             gene_A_pert=SlConstants.LOF_MUTATION,
             gene_B_pert=SlConstants.CRISPR_CAS9,
             effect_type=SlConstants.N_A,
             effect_size=SlConstants.N_A,
             cell_line='BRCA1 isogenic RPE1 cell line',
             cellosaurus_id=SlConstants.N_A,
             cancer_type=SlConstants.N_A,
             ncit_id=SlConstants.N_A,
             assay=SlConstants.MULTICOLOR_COMPETITION_ASSAY,
             pmid=self.pmid,
             SL=True)
         sli_list.append(sli)
     return sli_list
예제 #3
0
 def parse(self):
     sl_genes = {'SGK2', 'PAK3'}
     tp53 = 'TP53'
     pmid = '20616055'
     tp53id = self.get_ncbigene_curie(tp53)
     sli_list = []
     for geneB in sl_genes:
         geneBid = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(
             gene_A_symbol=tp53,
             gene_A_id=tp53id,
             gene_B_symbol=geneB,
             gene_B_id=geneBid,
             gene_A_pert=SlConstants.DEGRADATION,
             gene_B_pert=SlConstants.SH_RNA,
             effect_type=SlConstants.N_A,
             effect_size=SlConstants.N_A,
             cell_line='primary human foreskin keratinocytes',
             cellosaurus_id=SlConstants.N_A,
             cancer_type=SlConstants.N_A,
             ncit_id=SlConstants.N_A,
             assay=SlConstants.CELL_VIABILITY_ASSAY,
             pmid=pmid,
             SL=True)
         sli_list.append(sli)
     return sli_list
 def parse(self):
     ccne1 = 'CCNE1'
     ccne1_id = self.get_ncbigene_curie(ccne1)
     sli_list = []
     sligenes = {
         'CDK2', 'ACAT2', 'CSE1L', 'BRCA1', 'CCNA2', 'CDC42', 'CHD2',
         'DDX17', 'DUSP16', 'ENPP2', 'HNRNPA3', 'IARS2', 'MYC', 'PSMA5',
         'RRM1', 'SLC35A3', 'SMC2', 'SPATA6', 'SRBD1', 'TPX2', 'TUBB',
         'UBA1', 'VCP', 'XRCC2'
     }
     for geneB in sligenes:
         geneBId = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(
             gene_A_symbol=ccne1,
             gene_A_id=ccne1_id,
             gene_B_symbol=geneB,
             gene_B_id=geneBId,
             gene_A_pert=SlConstants.OVEREXPRESSION,
             gene_B_pert=SlConstants.SH_RNA,
             effect_type=SlConstants.N_A,
             effect_size=SlConstants.N_A,
             cell_line='102 cancer cell lines',
             cellosaurus_id=SlConstants.N_A,
             cancer_type=SlConstants.N_A,
             ncit_id=SlConstants.N_A,
             assay=SlConstants.SH_RNA_DEPLETION_ASSAY,
             pmid=self.pmid,
             SL=True)
         sli_list.append(sli)
     return sli_list
예제 #5
0
 def parse(self):
     sli_list = []
     tp53 = 'TP53'
     tp53id = self.get_ncbigene_curie(tp53)
     # Here we take the that 18/30 candidate SLNs displayed <50% relative viability
     # correct symbol for DICER is DICER1
     pos_sli = {'AMFR', 'ATM', 'CAPN9', 'DICER1',  'MACF1', 'MADCAM1', 'MCL1', 'MED21', 'MET',
                'MON1B', 'PLCB4', 'RAB8B', 'RAD1', 'SRPK1', 'STAU1', 'TGFB2', 'TRPC1', 'VEGFA'}
     for geneB in pos_sli:
         genebid = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(gene_A_symbol=tp53,
                                          gene_A_id=tp53id,
                                          gene_B_symbol=geneB,
                                          gene_B_id=genebid,
                                          gene_A_pert=SlConstants.PHARMACEUTICAL,
                                          gene_B_pert=SlConstants.SI_RNA,
                                          effect_type=SlConstants.N_A,
                                          effect_size=SlConstants.N_A,
                                          cell_line=SlConstants.HCT_116,
                                          cellosaurus_id=SlConstants.HCT_116_CELLOSAURUS,
                                          cancer_type=SlConstants.N_A,
                                          ncit_id=SlConstants.N_A,
                                          assay=SlConstants.CELL_VIABILITY_ASSAY,
                                          pmid=self.pmid,
                                          SL=True)
         sli_list.append(sli)
     return sli_list
 def create_sli(self, geneB_sym):
     tp53 = 'TP53'
     tp53id = self.get_ncbigene_curie(tp53)
     geneB_id = self.get_ncbigene_curie(geneB_sym)
     tp53_perturbation = SlConstants.LOF_MUTATION
     gene_B_pert = SlConstants.SI_RNA
     cell_line = SlConstants.HCT_116
     cellosaurus_id = SlConstants.HCT_116_CELLOSAURUS
     cancer_type = SlConstants.COLON_CARCINOMA
     ncit_id = SlConstants.COLON_CARCINOMA_NCIT
     assay = SlConstants.RNA_INTERFERENCE_ASSAY
     sli = SyntheticLethalInteraction(gene_A_symbol=tp53,
                                      species_id="10090",
                                      gene_A_id=tp53id,
                                      gene_B_symbol=geneB_sym,
                                      gene_B_id=geneB_id,
                                      gene_A_pert=tp53_perturbation,
                                      gene_B_pert=gene_B_pert,
                                      effect_type="n/a",
                                      effect_size="n/a",
                                      cell_line=cell_line,
                                      cellosaurus_id=cellosaurus_id,
                                      cancer_type=cancer_type,
                                      ncit_id=ncit_id,
                                      assay=assay,
                                      pmid=self.pmid,
                                      SL=True)
     return sli
예제 #7
0
 def parse(self):
     """
     Wnt agonist LY2090314, which mimics Wnt activation by inhibiting GSK3-β (Atkinson et al., 2015), emerged as a
     novel class of compound that inhibited the growth of all three cohesin mutants tested.
     RAD21, SMC3, and STAG2 deletion mutations in the breast epithelial cell line MCF10A resulte
     """
     gsk3b = 'GSK3B'
     gsk3b_id = self.get_ncbigene_curie(gsk3b)
     sli_list = []
     sligenes = {'RAD21', 'SMC3', 'STAG2'}
     for geneB in sligenes:
         geneBId = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(
             gene_A_symbol=gsk3b,
             gene_A_id=gsk3b_id,
             gene_B_symbol=geneB,
             gene_B_id=geneBId,
             gene_A_pert=SlConstants.PHARMACEUTICAL,
             gene_B_pert=SlConstants.CRISPR_CAS9,
             effect_type=SlConstants.N_A,
             effect_size=SlConstants.N_A,
             cell_line=SlConstants.MCF10A_CELL,
             cellosaurus_id=SlConstants.MCF10A_CELLOSAURUS,
             cancer_type=SlConstants.N_A,
             ncit_id=SlConstants.N_A,
             assay=SlConstants.CELL_VIABILITY_ASSAY,
             pmid=self.pmid,
             SL=True)
         sli_list.append(sli)
     return sli_list
예제 #8
0
 def parse(self):
     sli_list = []
     pik3ca = 'PIK3CA'
     pik3ca_id = self.get_ncbigene_curie(pik3ca)
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             geneBsym = self.get_current_symbol(row['Gene symbol'])
             geneBid = self.get_ncbigene_curie(geneBsym)
             px866 = float(row['PX-866'])
             nvpbez235 = float(row['NVP-BEZ235'])
             mean_fc = 0.5 * (px866+nvpbez235)
             sli = SyntheticLethalInteraction(gene_A_symbol=pik3ca,
                                              gene_A_id=pik3ca_id,
                                              gene_B_symbol=geneBsym,
                                              gene_B_id=geneBid,
                                              gene_A_pert=SlConstants.PHARMACEUTICAL,
                                              gene_B_pert=SlConstants.SI_RNA,
                                              effect_type=SlConstants.FOLD_CHANGE,
                                              effect_size=mean_fc,
                                              cell_line=SlConstants.U87WT_CELL,
                                              cellosaurus_id=SlConstants.U87WT_CELLOSAURUS,
                                              cancer_type=SlConstants.N_A,
                                              ncit_id=SlConstants.N_A,
                                              assay=SlConstants.GROWTH_INHIBITION_ASSAY,
                                              pmid=self.pmid,
                                              SL=True)
             sli_list.append(sli)
     return sli_list
예제 #9
0
 def parse(self):
     """
     For positives, we take genes with more than half of ≥ 7 siRNAs yielding > 4-fold sensitization (Figure 1b)
     """
     self._add_negatives()
     # TAB2 -- current symbol for MAP3K7IP2
     positive_sl = {
         'ATR', 'TAB2', 'PPP2R1A', 'RNF31', 'TRAF6', 'UPF1', 'USP5'
     }
     top1 = 'TOP1'
     top1_id = self.get_ncbigene_curie(top1)
     for geneB in positive_sl:
         geneB_id = self.get_ncbigene_curie(geneB)
         sli = SyntheticLethalInteraction(
             gene_A_symbol=top1,
             gene_A_id=top1_id,
             gene_B_symbol=geneB,
             gene_B_id=geneB_id,
             gene_A_pert=SlConstants.PHARMACEUTICAL,
             gene_B_pert=SlConstants.SI_RNA,
             effect_type=SlConstants.N_A,
             effect_size=SlConstants.N_A,
             cell_line=SlConstants.MDAMB231_CELL,
             cellosaurus_id=SlConstants.MDAMB231_CELLOSAURUS,
             cancer_type=SlConstants.N_A,
             ncit_id=SlConstants.N_A,
             assay=SlConstants.CELL_VIABILITY_ASSAY,
             pmid=self.pmid,
             SL=True)
         self.sli_list.append(sli)
     return self.sli_list
예제 #10
0
 def parse(self):
     gene1_perturbation = SlConstants.PHARMACEUTICAL
     gene2_perturbation = 'natural (is a TSG)'
     assay = "pharmaceutical + siRNA"
     # The following keeps track of the current largest effect size SLI for any given gene A/gene B pair
     sli_dict = defaultdict(list)
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if len(row) < 4:
                 raise ValueError(
                     "Only got %d fields but was expecting at least 4 tab-separated fields"
                     % len(row))
             # seperate col containing multiple genes
             geneA_sym = row['geneAlist'].split(",")
             geneB_sym = row['geneB']
             geneB_sym = self.get_current_symbol(geneB_sym)
             if geneB_sym in self.get_current_symbol(geneB_sym):
                 geneB_id = "NCBIGene:{}".format(
                     self.entrez_dict.get(geneB_sym))
             else:
                 raise ValueError(
                     "Could not find id for geneB %s in Srivasa 2016" %
                     geneB_sym)
             effect = float(row['effect'].replace(",", "."))
             for i in geneA_sym:
                 i = self.get_current_symbol(i)
                 if i in self.entrez_dict:
                     geneA_id = "NCBIGene:{}".format(
                         self.entrez_dict.get(i))
                 else:
                     raise ValueError(
                         "Could not find id for geneA %s in Srivasa 2016" %
                         i)
                 if geneA_id == geneB_id:
                     continue  # There are a few self loops in the data, but these are not SLIs, so we skip them
                 sli = SyntheticLethalInteraction(
                     gene_A_symbol=i,
                     gene_A_id=geneA_id,
                     gene_B_symbol=geneB_sym,
                     gene_B_id=geneB_id,
                     gene_A_pert=gene1_perturbation,
                     gene_B_pert=gene2_perturbation,
                     effect_type=SlConstants.ZSCORE,
                     effect_size=effect,
                     cell_line=SlConstants.HELA_CELL,
                     cellosaurus_id=SlConstants.HELA_CELLOSAURUS,
                     cancer_type=SlConstants.N_A,
                     ncit_id=SlConstants.N_A,
                     assay=assay,
                     pmid=self.pmid,
                     SL=True)
                 gene_pair = GenePair(i, geneB_sym)
                 sli_dict[gene_pair].append(sli)
     sli_list = self._mark_maximum_entries(sli_dict)
     return sli_list
 def parse(self):
     myc = 'MYC'
     myc_id = self.get_ncbigene_curie(myc)
     myc_perturbation = SlConstants.OVEREXPRESSION
     geneB_perturbation = SlConstants.SI_RNA
     assay_string = SlConstants.RNA_INTERFERENCE_ASSAY
     effect_type = SlConstants.LOG2_DECREASE_IN_ABUNDANCE
     cell_line = 'human mammary epithelial cells'
     cellosaurus = SlConstants.N_A
     cancer = SlConstants.N_A
     ncit = SlConstants.N_A
     sli_dict = defaultdict(list)
     # Pseudogenes, divergent nc transcripts
     # DIP maps to two newer symbols (also GIF
     unclear_gene_symbols = {
         'ATP5EP1', 'C10orf111', 'C19ORF30', 'C3ORF51', 'CG030', 'CLEC4GP1',
         'CSN1S2A', 'DIP', 'DKFZP434I0714', 'DVL1L1', 'FLJ20674',
         'FLJ22447', 'GIF', 'HCG27', 'HMG14P', 'IGLV@', 'LDHBP', 'OR5D2P',
         'RBMXP1', 'RPL19P1'
     }
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if len(row) != 3:
                 raise ValueError("Bad row with %d fields: %s" %
                                  (len(row), row))
             geneBsym = row['symbol']
             geneBsym = self.get_current_symbol(geneBsym)
             if geneBsym in self.entrez_dict:
                 geneB_id = self.get_ncbigene_curie(geneBsym)
             elif geneBsym in unclear_gene_symbols:
                 continue
             else:
                 raise ValueError(
                     "Could not find id for %s in Kessler 2012 " % geneBsym)
             medianDiffs = float(row['median.pair.diffs'])
             sli = SyntheticLethalInteraction(
                 gene_A_symbol=myc,
                 gene_A_id=myc_id,
                 gene_B_symbol=geneBsym,
                 gene_B_id=geneB_id,
                 gene_A_pert=myc_perturbation,
                 gene_B_pert=geneB_perturbation,
                 effect_type=effect_type,
                 effect_size=medianDiffs,
                 cell_line=cell_line,
                 cellosaurus_id=cellosaurus,
                 cancer_type=cancer,
                 ncit_id=ncit,
                 assay=assay_string,
                 pmid=self.pmid,
                 SL=True)
             gene_pair = GenePair(myc, geneBsym)
             sli_dict[gene_pair].append(sli)
     sli_list = self._mark_maximum_entries(sli_dict)
     return sli_list
예제 #12
0
 def parse(self):
     geneA_symbol = 'CHEK1'
     geneA_id = 'NCBIGene:1111'
     geneA_perturbation = SlConstants.PHARMACEUTICAL
     gene2_perturbation = SlConstants.SI_RNA
     assay = SlConstants.RNA_INTERFERENCE_ASSAY
     effect_type = SlConstants.ZSCORE
     cell_line = "HeLa-Cells"
     cellosaurus = "CVCL_0030"
     cancer = ""
     ncit = ""  #
     # The following keeps track of the current largest effect size SLI for any given gene A/gene B pair
     sli_dict = defaultdict(list)
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         # Z-Score	Symbol	Entrez ID	Gene Name
         for row in csvreader:
             if len(row) < 3:
                 raise ValueError("Only got %d fields but was expecting at least 3" % len(row))
             geneB_sym = row['Symbol']
             geneB_sym = self.get_current_symbol(geneB_sym)
             if geneB_sym == 'CHEK1':
                 continue  # Do not allow self-loops!
             if geneB_sym in self.entrez_dict:
                 geneB_id = "NCBIGene:{}".format(self.entrez_dict.get(geneB_sym))
             else:
                 raise ValueError("Could not find id for gene symbol %s in Shen 2015" % geneB_sym)
             effect = float(row['Z-Score'].replace(",", "."))
             sl_genes = ["FZR1", "RAD17", "RFC1", "BLM", "CDC73", "CDC6", "WEE1"]
             if geneB_sym in sl_genes:
                 SL = True
             else:
                 SL = False
             sli = SyntheticLethalInteraction(gene_A_symbol=geneA_symbol,
                                              gene_A_id=geneA_id,
                                              gene_B_symbol=geneB_sym,
                                              gene_B_id=geneB_id,
                                              gene_A_pert=geneA_perturbation,
                                              gene_B_pert=gene2_perturbation,
                                              effect_type=effect_type,
                                              effect_size=effect,
                                              cell_line=cell_line,
                                              cellosaurus_id=cellosaurus,
                                              cancer_type=cancer,
                                              ncit_id=ncit,
                                              assay=assay,
                                              pmid=self.pmid,
                                              SL=SL)
             gene_pair = GenePair(geneA_symbol, geneB_sym)
             sli_dict[gene_pair].append(sli)
     sli_list = self._mark_maximum_entries(sli_dict)
     return sli_list
예제 #13
0
 def parse(self):
     # using supplemental file 1
     gene1_perturbation = SlConstants.SG_RNA
     gene2_perturbation = SlConstants.SG_RNA
     assay = SlConstants.RNA_INTERFERENCE_ASSAY
     sli_list = []
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if len(row) < 4:
                 raise ValueError(
                     "Only got %d fields but was expecting at least 4 tab-separated fields"
                     % len(row))
             # separate genes
             genes = row['Drug-target.Pairs'].split("__")
             geneA_sym = self.get_current_symbol(genes[0])
             geneB_sym = self.get_current_symbol(genes[1])
             if geneA_sym in self.entrez_dict:
                 geneA_id = "NCBIGene:{}".format(
                     self.entrez_dict.get(geneA_sym))
             else:
                 raise ValueError(
                     "could not find id for gene A (%s) in Han 2017" %
                     geneA_sym)
             if geneB_sym in self.entrez_dict:
                 geneB_id = "NCBIGene:{}".format(
                     self.entrez_dict.get(geneB_sym))
             else:
                 raise ValueError(
                     "could not find id for gene B (%s) in Han 2017" %
                     geneB_sym)
             effect = -4  # No exact value given, but authors state at least -4 for all SLIs
             sli = SyntheticLethalInteraction(
                 gene_A_symbol=geneA_sym,
                 gene_A_id=geneA_id,
                 gene_B_symbol=geneB_sym,
                 gene_B_id=geneB_id,
                 gene_A_pert=gene1_perturbation,
                 gene_B_pert=gene2_perturbation,
                 effect_type=SlConstants.ZSCORE,
                 effect_size=effect,
                 cell_line=SlConstants.K562_CELL,
                 cellosaurus_id=SlConstants.K562_CELLOSAURUS,
                 cancer_type=SlConstants.CHRONIC_MYELOGENOUS_LEUKEMIA,
                 ncit_id=SlConstants.CHRONIC_MYELOGENOUS_LEUKEMIA_NCIT,
                 assay=assay,
                 pmid=self.pmid,
                 SL=True)
             sli_list.append(sli)
     return sli_list
 def parseKRAS(self):
     """
     BLM is BLM RecQ like helicase
     """
     geneA = 'KRAS'
     geneAid = self.get_ncbigene_curie(geneA)
     fname = 'data/vizeacoumarSuppl4-PTEN.tsv'
     geneA_perturbation = SlConstants.ACTIVATING_MUTATION
     gene2_perturbation = SlConstants.SI_RNA
     assay_string = SlConstants.MULTICOLOR_COMPETITION_ASSAY
     cell_line = 'HCT 116'
     cellosaurus = 'CVCL_0291'
     cancer = SlConstants.N_A
     ncit = SlConstants.N_A
     c = 0
     with open(fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if not row['Expression'] == 'Expressed':
                 continue
             geneBsym = self.get_current_symbol(row['human gene'])
             if geneBsym in self.entrez_dict:
                 geneB_id = self.get_ncbigene_curie(geneBsym)
             elif geneBsym in self.unclear_gene_symbols:
                 continue
             else:
                 raise ValueError(
                     "Could not find iid for %s in Brough 2018 2008 " %
                     geneBsym)
             conf80 = int(row['80% Confidence Interval (P<0.2)'])
             if conf80 == 1:
                 c += 1
                 sli = SyntheticLethalInteraction(
                     gene_A_symbol=geneA,
                     gene_A_id=geneAid,
                     gene_B_symbol=geneBsym,
                     gene_B_id=geneB_id,
                     gene_A_pert=geneA_perturbation,
                     gene_B_pert=gene2_perturbation,
                     effect_type='confidence.80%',
                     effect_size='true',
                     cell_line=cell_line,
                     cellosaurus_id=cellosaurus,
                     cancer_type=cancer,
                     ncit_id=ncit,
                     assay=assay_string,
                     pmid=self.pmid,
                     SL=True)
                 self.sli_list.append(sli)
예제 #15
0
 def parse(self):
     mycsymbol = 'MYC'
     effect_type = 'stddev'
     cell_line = 'HFF-Myc'
     cellosaurus = 'CVCL_Y511'
     sl_list = []
     # The following list includes symbols that are not current but either could
     # not be matched or match to multiple possible candidates
     unclear_gene_symbols = {'MLCK'}
     # Gene.Symbol	Accession.number	Z.score.greaterthan	%Viability.HFF-pB	%Viability.HFF-MYC	Ratio pBabe/Myc
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if len(row) != 6:
                 raise ValueError("Bad line with %d fields: %s" %
                                  (len(row), row))
             # Gene Symbol	Accession number	Z score (>than)	%Viability HFF-pB	%Viability HFF-MYC	Ratio pBabe/Myc
             geneBsym = row['Gene.Symbol']
             if geneBsym in unclear_gene_symbols:
                 continue
             geneBsym = self.get_current_symbol(geneBsym)
             if geneBsym in self.entrez_dict:
                 geneB_id = "NCBIGene:{}".format(
                     self.entrez_dict.get(geneBsym))
             else:
                 raise ValueError(
                     "Could not find id for symbol %s in Toyoshima 2008" %
                     geneBsym)
             zscore = float(row['Z.score.greaterthan'])
             sli = SyntheticLethalInteraction(
                 gene_A_symbol=mycsymbol,
                 gene_A_id=SlConstants.MYC_GENE_ID,
                 gene_B_symbol=geneBsym,
                 gene_B_id=geneB_id,
                 gene_A_pert=SlConstants.OVEREXPRESSION,
                 gene_B_pert=SlConstants.SI_RNA,
                 effect_type=effect_type,
                 effect_size=zscore,
                 cell_line=cell_line,
                 cellosaurus_id=cellosaurus,
                 cancer_type=SlConstants.N_A,
                 ncit_id=SlConstants.N_A,
                 assay=SlConstants.RNA_INTERFERENCE_ASSAY,
                 pmid=self.pmid,
                 SL=True)
             sl_list.append(sli)
     return sl_list
예제 #16
0
 def parse(self):
     vhl = 'VHL'
     sli_list = []
     unclear_gene_symbols = {'QARS', 'SARS'}
     # I could figure out that the following mappings are correct and unique with the HGNC website
     mappings = {
         'ORAOV1': 'LTO1',
         'VWA9': 'INTS14',
         'NARFL': 'CIAO3',
         'WBSCR22': 'BUD23',
         'UFD1L': 'UFD1'
     }
     with open(self.fname) as f:
         for line in f:
             geneBsym = line.strip()
             if geneBsym in self.entrez_dict:
                 geneBid = self.get_ncbigene_curie(geneBsym)
             elif geneBsym == 'DARS' or geneBsym == 'NARS' or geneBsym == 'KARS' or geneBsym == 'YARS':
                 # A group of tRNA genes that need to have the '1' (I could map these uniquely with HGNC)
                 geneBsym = "%s1" % geneBsym
                 geneBid = self.get_ncbigene_curie(geneBsym)
             elif geneBsym in mappings:
                 geneBsym = mappings.get(geneBsym)
                 geneBid = self.get_ncbigene_curie(geneBsym)
             elif geneBsym in unclear_gene_symbols:
                 continue
             else:
                 raise ValueError("Could not find id for %s in Sun 2019" %
                                  geneBsym)
             sli = SyntheticLethalInteraction(
                 gene_A_symbol=vhl,
                 gene_A_id=SlConstants.VHL_GENE_ID,
                 gene_B_symbol=geneBsym,
                 gene_B_id=geneBid,
                 gene_A_pert=SlConstants.LOF_MUTATION,
                 gene_B_pert=SlConstants.CRISPR_CAS9,
                 effect_type=SlConstants.N_A,
                 effect_size=SlConstants.N_A,
                 cell_line=SlConstants.A498_CELL,
                 cellosaurus_id=SlConstants.A498_CELLOSAURUS,
                 cancer_type=SlConstants.N_A,
                 ncit_id=SlConstants.N_A,
                 assay=SlConstants.CRISPR_CAS9_INTERFERENCE_ASSAY,
                 pmid=self.pmid,
                 SL=True)
             sli_list.append(sli)
     return sli_list
 def parse(self):
     perturbation = SlConstants.KNOCKOUT
     cellosuarus = SlConstants.HAP1_CELLOSAURUS
     assay = 'proportions.of.sense.and.antisense.insertions'
     sli_dict = defaultdict(list)
     # GENE	SUMMARY	PUBMED ID	INTERACTING QUERY GENE
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             geneA = self.get_current_symbol(row['GENE'])
             if geneA in self.entrez_dict:
                 geneA_id = "NCBIGene:{}".format(
                     self.entrez_dict.get(geneA))
             else:
                 raise ValueError(
                     "[ERROR] We could not find a gene id for " + geneA)
             geneBlist = row['INTERACTING QUERY GENE']
             for geneB in geneBlist.split(';'):
                 geneB = geneB.strip()
                 geneB = self.get_current_symbol(geneB)
                 if geneB in self.entrez_dict:
                     geneB_id = "NCBIGene:{}".format(
                         self.entrez_dict.get(geneB))
                 else:
                     raise ValueError(
                         "Could not get NCBI id for gene \"%s\" in Blomen 2015"
                         % geneB)
                 sli = SyntheticLethalInteraction(
                     gene_A_symbol=geneA,
                     gene_A_id=geneA_id,
                     gene_B_symbol=geneB,
                     gene_B_id=geneB_id,
                     gene_A_pert=perturbation,
                     gene_B_pert=perturbation,
                     effect_type=SlConstants.N_A,
                     effect_size=0,
                     cell_line=SlConstants.HAP1_CELL,
                     cellosaurus_id=cellosuarus,
                     cancer_type=SlConstants.N_A,
                     ncit_id=SlConstants.N_A,
                     assay=assay,
                     pmid=self.pmid,
                     SL=True)
                 gene_pair = GenePair(geneA, geneB)
                 sli_dict[gene_pair].append(sli)
                 sli_list = self._mark_maximum_entries(sli_dict)
     return sli_list
예제 #18
0
    def parse_suppl10_11(self, fname):
        rb1 = 'RB1'
        rb1_id = SlConstants.RB1_GENE_ID
        rb1_perturbation = SlConstants.LOF_MUTATION
        gene2_perturbation = SlConstants.SI_RNA
        assay_string = "siMEM+penetrance"
        effect_type = "penetrance"
        cell_line = SlConstants.N_A
        cellosaurus = SlConstants.N_A
        cancer = SlConstants.N_A
        ncit = SlConstants.N_A

        with open(fname) as csvfile:
            csvreader = csv.DictReader(csvfile, delimiter='\t')
            for row in csvreader:
                # print(row)
                geneBsym = self.get_current_symbol(row['target'])
                if ',' in geneBsym:
                    continue # We cannot assign an effect unambiguously to one of the genes
                    # some of the entries are like  PMS2,PMS2CL
                if geneBsym in self.entrez_dict:
                    geneB_id = self.get_ncbigene_curie(geneBsym)
                elif geneBsym in self.unclear_gene_symbols:
                    continue
                else:
                    raise ValueError("Could not find id for %s in Brough 2018 2008 " % geneBsym)
                penetrance = int(row['Penetrance.(%)'])
                if penetrance >= 80:
                    sli = SyntheticLethalInteraction(gene_A_symbol=rb1,
                                                     gene_A_id=rb1_id,
                                                     gene_B_symbol=geneBsym,
                                                     gene_B_id=geneB_id,
                                                     gene_A_pert=rb1_perturbation,
                                                     gene_B_pert=gene2_perturbation,
                                                     effect_type=effect_type,
                                                     effect_size=penetrance,
                                                     cell_line=cell_line,
                                                     cellosaurus_id=cellosaurus,
                                                     cancer_type=cancer,
                                                     ncit_id=ncit,
                                                     assay=assay_string,
                                                     pmid=self.pmid,
                                                     SL=True)
                    gene_pair = GenePair(rb1, geneBsym)
                    self.sli_dict[gene_pair].append(sli)
예제 #19
0
    def parse_suppl9(self):
        fname = 'data/brough_2012_suppl9.tsv'
        rb1 = 'RB1'
        rb1_id = SlConstants.RB1_GENE_ID
        rb1_perturbation = SlConstants.LOF_MUTATION
        gene2_perturbation = SlConstants.SI_RNA
        assay_string = "siMEM+penetrance"
        effect_type = "penetrance"
        cell_line = SlConstants.N_A
        cellosaurus = SlConstants.N_A
        cancer = SlConstants.N_A
        ncit = SlConstants.N_A

        with open(fname) as csvfile:
            csvreader = csv.DictReader(csvfile, delimiter='\t')
            for row in csvreader:
                # print(row)
                geneBsym = self.get_current_symbol(row['symbol'])
                if geneBsym in self.entrez_dict:
                    geneB_id = "NCBIGene:{}".format(self.entrez_dict.get(geneBsym))
                elif geneBsym in self.unclear_gene_symbols:
                    continue
                else:
                    raise ValueError("Could not find iid for %s in Brough 2018 2008 " % geneBsym)
                penetrance = int(row['Penetrance.%'])
                if penetrance >= 80:
                    sli = SyntheticLethalInteraction(gene_A_symbol=rb1,
                                                     gene_A_id=rb1_id,
                                                     gene_B_symbol=geneBsym,
                                                     gene_B_id=geneB_id,
                                                     gene_A_pert=rb1_perturbation,
                                                     gene_B_pert=gene2_perturbation,
                                                     effect_type=effect_type,
                                                     effect_size=penetrance,
                                                     cell_line=cell_line,
                                                     cellosaurus_id=cellosaurus,
                                                     cancer_type=cancer,
                                                     ncit_id=ncit,
                                                     assay=assay_string,
                                                     pmid=self.pmid,
                                                     SL=True)
                    gene_pair = GenePair(rb1, geneBsym)
                    self.sli_dict[gene_pair].append(sli)
 def setUp(self) -> None:
     self.parameters = {
         'gene_A_symbol': 'VPS54',
         'gene_A_id': 'NCBIGene:51542',
         'gene_B_symbol': 'PTAR1',
         'gene_B_id': 'NCBIGene:375743',
         'gene_A_pert': 'pert1',
         'gene_B_pert': 'pert2',
         'effect_type': 'thisEff',
         'effect_size': '20',
         'cell_line': 'cellLine42',
         'cellosaurus_id': 'csID1',
         'cancer_type': 'melanoma',
         'ncit_id': 'ncit1234',
         'assay': 'thisAssay',
         'pmid': '27453043',
         'SL': True
     }
     self.no_getter = []
     self.sli = SyntheticLethalInteraction(**self.parameters)
 def create_sli(self, geneB, SL):
     STAG2 = 'STAG2'
     STAG2_id = self.get_ncbigene_curie(STAG2)
     geneBid = self.get_ncbigene_curie(geneB)
     sli = SyntheticLethalInteraction(
         gene_A_symbol=STAG2,
         gene_A_id=STAG2_id,
         gene_B_symbol=geneB,
         gene_B_id=geneBid,
         gene_A_pert=SlConstants.LOF_MUTATION,
         gene_B_pert=SlConstants.SH_RNA,
         cell_line=SlConstants.H4_CELL,
         cellosaurus_id=SlConstants.H4_CELLOSAURUS,
         cancer_type=SlConstants.N_A,
         ncit_id=SlConstants.N_A,
         effect_size=SlConstants.N_A,
         effect_type=SlConstants.N_A,
         assay=SlConstants.CELL_VIABILITY_ASSAY,
         SL=SL,
         pmid=self.pmid)
     return sli
예제 #22
0
 def create_and_add_sli(self,
                        geneA,
                        geneB,
                        geneApert,
                        geneBpert,
                        assay,
                        pmid,
                        cell=SlConstants.N_A,
                        cellosaurus=SlConstants.N_A,
                        cancer=SlConstants.N_A,
                        ncit=SlConstants.N_A,
                        effecttype=SlConstants.N_A,
                        effectsize=SlConstants.N_A,
                        background_dependency_status=SlConstants.N_A,
                        background_dependency_gene_symbol=SlConstants.N_A,
                        background_dependency_gene_id=SlConstants.N_A,
                        sl=True):
     geneAid = self.get_ncbigene_curie(geneA)
     geneBid = self.get_ncbigene_curie(geneB)
     sli = SyntheticLethalInteraction(
         gene_A_symbol=geneA,
         gene_A_id=geneAid,
         gene_B_symbol=geneB,
         gene_B_id=geneBid,
         gene_A_pert=geneApert,
         gene_B_pert=geneBpert,
         effect_type=effecttype,
         effect_size=effectsize,
         cell_line=cell,
         cellosaurus_id=cellosaurus,
         cancer_type=cancer,
         ncit_id=ncit,
         assay=assay,
         background_dependency_status=background_dependency_status,
         background_dependency_gene_symbol=background_dependency_gene_symbol,
         background_dependency_gene_id=background_dependency_gene_id,
         pmid=pmid,
         SL=sl)
     self.entries.append(sli)
예제 #23
0
    def parse(self):
        """
        overlapping identified ATRi co-essential genes. (significant in all three screens)
        RNASEH2 was validated in detail.
        Note, we replaced C17orf53 by HROB (gene id: 78995)
        MGEA5 by OGA (Gene ID: 10724)
        """

        sig_genes = {
            'RNASEH2B', 'RNASEH2A', "DSCC1", "TMEM208", "POLE3", "POLE4",
            "LEO1", "CNOT1", "SETD1A", "HROB", "OGA", "MCM9", "USP37",
            "THRAP3", "DPYS", "CKS2", "RHNO1", "HUS1"
        }
        sli_list = []
        atr = 'ATR'
        atr_id = self.get_ncbigene_curie(atr)
        for geneB in sig_genes:
            if geneB in self.entrez_dict:
                geneb_id = self.get_ncbigene_curie(geneB)
                sli = SyntheticLethalInteraction(
                    gene_A_symbol=atr,
                    gene_A_id=atr_id,
                    gene_B_symbol=geneB,
                    gene_B_id=geneb_id,
                    gene_A_pert=SlConstants.PHARMACEUTICAL,
                    gene_B_pert=SlConstants.CRISPR_CAS9,
                    effect_type=SlConstants.N_A,
                    effect_size=SlConstants.N_A,
                    cell_line=SlConstants.N_A,
                    cellosaurus_id=SlConstants.N_A,
                    cancer_type=SlConstants.N_A,
                    ncit_id=SlConstants.N_A,
                    assay=SlConstants.CRISPR_CAS9_INTERFERENCE_ASSAY,
                    pmid=self.pmid,
                    SL=True)
                sli_list.append(sli)
            else:
                raise ValueError("Could not find id for ", geneB)
        return sli_list
예제 #24
0
 def _add_negatives(self):
     top1 = 'TOP1'
     top1_id = self.get_ncbigene_curie(top1)
     # header Symbol	Gene_ID	Rank	RSA p-value	FDR
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if len(row) != 5:
                 raise ValueError(
                     "Bad line with %d instead of 6 fields: %s" %
                     (len(row), row))
             geneB = row['Symbol']
             pval = float(row['RSA.p-value'])
             if pval < 0.5:
                 continue
             sym = self.get_current_symbol(geneB)
             if sym in self.entrez_dict:
                 # We skip symbols that cannot be identified for this negative list
                 geneB_id = self.get_ncbigene_curie(sym)
                 if top1_id == geneB_id:
                     continue  # There is one self-loop in the data, we discard it because self-loops
                     # cannot be SLIs
                 sli = SyntheticLethalInteraction(
                     gene_A_symbol=top1,
                     gene_A_id=top1_id,
                     gene_B_symbol=sym,
                     gene_B_id=geneB_id,
                     gene_A_pert=SlConstants.PHARMACEUTICAL,
                     gene_B_pert=SlConstants.SI_RNA,
                     effect_type=SlConstants.PVAL,
                     effect_size=pval,
                     cell_line=SlConstants.MDAMB231_CELL,
                     cellosaurus_id=SlConstants.MDAMB231_CELLOSAURUS,
                     cancer_type=SlConstants.N_A,
                     ncit_id=SlConstants.N_A,
                     assay=SlConstants.CELL_VIABILITY_ASSAY,
                     pmid=self.pmid,
                     SL=False)
                 self.sli_list.append(sli)
 def parseLoF(self, geneA, fname):
     """
     BLM, MUS81, PTEN, PTTG1
     """
     geneAid = self.get_ncbigene_curie(geneA)
     with open(fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             if not row['Expression'] == 'Expressed':
                 continue
             geneBsym = self.get_current_symbol(row['human gene'])
             if geneBsym in self.entrez_dict:
                 geneB_id = self.get_ncbigene_curie(geneBsym)
             elif geneBsym in self.unclear_gene_symbols:
                 continue
             else:
                 raise ValueError(
                     "Could not find iid for %s in Brough 2018 2008 " %
                     geneBsym)
             conf80 = int(row['80% Confidence Interval (P<0.2)'])
             if conf80 == 1:
                 sli = SyntheticLethalInteraction(
                     gene_A_symbol=geneA,
                     gene_A_id=geneAid,
                     gene_B_symbol=geneBsym,
                     gene_B_id=geneB_id,
                     gene_A_pert=SlConstants.LOF_MUTATION,
                     gene_B_pert=SlConstants.SI_RNA,
                     effect_type='confidence.80%',
                     effect_size='true',
                     cell_line=SlConstants.HCT_116,
                     cellosaurus_id=SlConstants.HCT_116_CELLOSAURUS,
                     cancer_type=SlConstants.N_A,
                     ncit_id=SlConstants.N_A,
                     assay=SlConstants.MULTICOLOR_COMPETITION_ASSAY,
                     pmid=self.pmid,
                     SL=True)
                 self.sli_list.append(sli)
예제 #26
0
 def get_sli(self, geneA_sym, geneA_id, geneB_sym, geneB_id):
     # Gene A should be eighter NRAS or KRAS.
     # These genes had activating mutations in the cell lines
     ncit = "NCIT:C3171"
     cancer = "Acute Myeloid Leukemia"
     sli = SyntheticLethalInteraction(
         gene_A_symbol=geneA_sym,
         species_id="10090",
         gene_A_id=geneA_id,
         gene_B_symbol=geneB_sym,
         gene_B_id=geneB_id,
         gene_A_pert=SlConstants.ACTIVATING_MUTATION,
         gene_B_pert=SlConstants.SG_RNA,
         effect_type="n/a",
         effect_size="n/a",
         cell_line="n/a",
         cellosaurus_id="n/a",
         cancer_type=cancer,
         ncit_id=ncit,
         assay=SlConstants.CRISPR_CAS9_INTERFERENCE_ASSAY,
         pmid=self.pmid,
         SL=True)
     return sli
 def get_sli(self, geneA_sym, geneA_id, geneB_sym, geneB_id):
     # Gene A should be eighter NRAS or KRAS.
     # These genes had activating mutations in the cell lines
     ncit = SlConstants.N_A
     cancer = SlConstants.N_A
     sli = SyntheticLethalInteraction(
         gene_A_symbol=geneA_sym,
         species_id="10090",
         gene_A_id=geneA_id,
         gene_B_symbol=geneB_sym,
         gene_B_id=geneB_id,
         gene_A_pert=SlConstants.SI_RNA,
         gene_B_pert=SlConstants.SI_RNA,
         effect_type=SlConstants.N_A,
         effect_size=SlConstants.N_A,
         cell_line=SlConstants.N_A,
         cellosaurus_id=SlConstants.N_A,
         cancer_type=cancer,
         ncit_id=ncit,
         assay=SlConstants.MULTICOLOR_COMPETITION_ASSAY,
         pmid=self.pmid,
         SL=True)
     return sli
예제 #28
0
 def get_sli(self, geneB_sym, pmid):
     # Gene A should be eighter NRAS or KRAS.
     # These genes had activating mutations in the cell lines
     braf = 'BRAF'
     brafID = SlConstants.BRAF_GENE_ID
     geneB_id = self.get_ncbigene_curie(geneB_sym)
     sli = SyntheticLethalInteraction(gene_A_symbol=braf,
                                      species_id="10090",
                                      gene_A_id=brafID,
                                      gene_B_symbol=geneB_sym,
                                      gene_B_id=geneB_id,
                                      gene_A_pert=SlConstants.ACTIVATING_MUTATION,
                                      gene_B_pert=SlConstants.SH_RNA,
                                      effect_type=SlConstants.N_A,
                                      effect_size=SlConstants.N_A,
                                      cell_line=SlConstants.A375_CELL,
                                      cellosaurus_id=SlConstants.A375_CELLOSAURUS,
                                      cancer_type=SlConstants.MELANOMA,
                                      ncit_id=SlConstants.MELANOMA_NCIT,
                                      assay=SlConstants.CELL_VIABILITY_ASSAY,
                                      pmid=pmid,
                                      SL=True)
     return sli
예제 #29
0
    def get_sli(self, geneB_sym, geneB_id, pval, slstatus):
        # Gene A should be eighter NRAS or KRAS.
        # These genes had activating mutations in the cell lines
        vhl = 'VHL'
        vhlID = SlConstants.VHL_GENE_ID

        sli = SyntheticLethalInteraction(gene_A_symbol=vhl,
                                         species_id="10090",
                                         gene_A_id=vhlID,
                                         gene_B_symbol=geneB_sym,
                                         gene_B_id=geneB_id,
                                         gene_A_pert=SlConstants.LOF_MUTATION,
                                         gene_B_pert=SlConstants.SH_RNA,
                                         effect_type=SlConstants.PVAL,
                                         effect_size=pval,
                                         cell_line=SlConstants.A498_CELL,
                                         cellosaurus_id=SlConstants.A498_CELLOSAURUS,
                                         cancer_type=SlConstants.CLEAR_CELL_RENAL_CELL_CARCINOMA,
                                         ncit_id=SlConstants.CLEAR_CELL_RENAL_CELL_CARCINOMA_NCIT,
                                         assay=SlConstants.SG_RNA_DEPLETION_ASSAY,
                                         pmid=self.pmid,
                                         SL=slstatus)
        return sli
예제 #30
0
 def parse(self):
     """
     symbol	MCF12A.Z-score	HCC1143.Z-score
     """
     sli_list = []
     atr = 'ATR'
     atr_id = self.get_ncbigene_curie(atr)
     with open(self.fname) as csvfile:
         csvreader = csv.DictReader(csvfile, delimiter='\t')
         for row in csvreader:
             geneBsym = self.get_current_symbol(row['symbol'])
             if geneBsym == 'C9ORF96':
                 geneBsym = 'STKLD1'
             geneB_id = self.get_ncbigene_curie(geneBsym)
             mcf12 = float(row['MCF12A.Z-score'])
             hcc1143 = float(row['HCC1143.Z-score'])
             meanz = 0.5 * (mcf12 + hcc1143)
             if geneBsym == atr:
                 continue  # There is one self-loop in the dataset, but we skip it, it cannot be an SLI
             sli = SyntheticLethalInteraction(
                 gene_A_symbol=atr,
                 gene_A_id=atr_id,
                 gene_B_symbol=geneBsym,
                 gene_B_id=geneB_id,
                 gene_A_pert=SlConstants.PHARMACEUTICAL,
                 gene_B_pert=SlConstants.SI_RNA,
                 effect_type=SlConstants.ZSCORE,
                 effect_size=meanz,
                 cell_line=SlConstants.HCC1143_CELL,
                 cellosaurus_id=SlConstants.HCC1143_CELLOSAURUS,
                 cancer_type=SlConstants.N_A,
                 ncit_id=SlConstants.N_A,
                 assay=SlConstants.CELL_VIABILITY_ASSAY,
                 pmid=self.pmid,
                 SL=True)
             sli_list.append(sli)
         return sli_list