Esempio n. 1
0
 def __scan_sfs(self):
     for group, loci_input in enumerate(self.file.data):
         if group >= self.limit:
             break
         # text sfs file does not have any haplotype pools
         loci_input['pool'] = [[]]
         if self.chip_file:
             cdata = self.chip_file.getdata(loci_input['name'])
             if cdata is None or (not is_within(cdata['num_variants'],
                                                self.args.def_valid_locus)):
                 continue
             loci_input['missing'] = [
                 False if x in cdata['pos'] else True
                 for x in loci_input['pos']
             ]
             assert len(loci_input['missing']) == len(loci_input['maf'])
         else:
             loci_input['missing'] = None
         if is_within(loci_input['num_variants'],
                      self.args.def_valid_locus):
             if self.data_buffer is None:
                 self.result.append(
                     Calculator(self.args, self.unknown_args,
                                loci_input).run())
             else:
                 self.data_buffer.append(loci_input)
         self.pbar.update(group + 1)
Esempio n. 2
0
 def __scan_gdat(self):
     '''scan gdat file'''
     maf = 'maf'
     pos = 'position'
     function_score = 'annotation'
     # Allow for customized key names in gdat file
     try:
         for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1),
                         getColumn(self.args.data[:-5] + '.key', 2)):
             if x == 'maf':
                 maf = y
             if x == 'position':
                 pos = y
             if x == 'annotation':
                 function_score = y
     except:
         pass
     #
     for group, item in enumerate(self.groups):
         if group >= self.limit:
             break
         data = self.file.getdata(item)
         if self.args.resampling:
             data.decompress()
         else:
             data['haplotype'] = [[]]
         try:
             loci_input = {
                 'pool': data['haplotype'],
                 'name': item,
                 'maf': list(data[maf]),
                 'pos': list(data[pos]),
                 'function_score': list(data[function_score])
             }
         except KeyError as e:
             env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\
                              format(e, self.args.data[:-5]))
             continue
         loci_input['num_variants'] = len(loci_input['maf'])
         if self.chip_file:
             cdata = self.chip_file.getdata(item)
             if cdata is None or (not is_within(cdata['num_variants'],
                                                self.args.def_valid_locus)):
                 continue
             loci_input['missing'] = [
                 False if x in cdata['pos'] else True
                 for x in loci_input['pos']
             ]
         else:
             loci_input['missing'] = None
         if is_within(loci_input['num_variants'],
                      self.args.def_valid_locus):
             if self.data_buffer is None:
                 self.result.append(
                     Calculator(self.args, self.unknown_args,
                                loci_input).run())
             else:
                 self.data_buffer.append(loci_input)
         self.pbar.update(group + 1)
Esempio n. 3
0
 def __scan_gdat(self):
     """scan gdat file"""
     maf = "maf"
     pos = "position"
     function_score = "annotation"
     # Allow for customized key names in gdat file
     try:
         for x, y in zip(getColumn(self.args.data[:-5] + ".key", 1), getColumn(self.args.data[:-5] + ".key", 2)):
             if x == "maf":
                 maf = y
             if x == "position":
                 pos = y
             if x == "annotation":
                 function_score = y
     except:
         pass
     #
     for group, item in enumerate(self.groups):
         if group >= self.limit:
             break
         data = self.file.getdata(item)
         if self.args.resampling:
             data.decompress()
         else:
             data["haplotype"] = [[]]
         try:
             loci_input = {
                 "pool": data["haplotype"],
                 "name": item,
                 "maf": list(data[maf]),
                 "pos": list(data[pos]),
                 "function_score": list(data[function_score]),
             }
         except KeyError as e:
             env.logger.error(
                 "Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.".format(
                     e, self.args.data[:-5]
                 )
             )
             continue
         loci_input["num_variants"] = len(loci_input["maf"])
         if self.chip_file:
             cdata = self.chip_file.getdata(item)
             if cdata is None or (not is_within(cdata["num_variants"], self.args.def_valid_locus)):
                 continue
             loci_input["missing"] = [False if x in cdata["pos"] else True for x in loci_input["pos"]]
         else:
             loci_input["missing"] = None
         if is_within(loci_input["num_variants"], self.args.def_valid_locus):
             if self.data_buffer is None:
                 self.result.append(Calculator(self.args, self.unknown_args, loci_input).run())
             else:
                 self.data_buffer.append(loci_input)
         self.pbar.update(group + 1)
Esempio n. 4
0
 def __scan_gdat(self):
     '''scan gdat file'''
     maf = 'maf'
     pos = 'position'
     function_score = 'annotation'
     # Allow for customized key names in gdat file
     try:
         for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1),
                         getColumn(self.args.data[:-5] + '.key', 2)):
             if x == 'maf':
                 maf = y
             if x == 'position':
                 pos = y
             if x == 'annotation':
                 function_score = y
     except:
         pass
     #
     for group, item in enumerate(self.groups):
         if group >= self.limit:
             break
         data = self.file.getdata(item)
         if self.args.resampling:
             data.decompress()
         else:
             data['haplotype'] = [[]]
         try:
             loci_input = {'pool':data['haplotype'], 'name':item,
                           'maf':list(data[maf]), 'pos':list(data[pos]),
                           'function_score':list(data[function_score])}
         except KeyError as e:
             env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\
                              format(e, self.args.data[:-5]))
             continue
         loci_input['num_variants'] = len(loci_input['maf'])
         if self.chip_file:
             cdata = self.chip_file.getdata(item)
             if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)):
                 continue
             loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']]
         else:
             loci_input['missing'] = None
         if is_within(loci_input['num_variants'], self.args.def_valid_locus):
             if self.data_buffer is None:
                 self.result.append(Calculator(self.args, self.unknown_args,loci_input).run())
             else:
                 self.data_buffer.append(loci_input)
         self.pbar.update(group + 1)
Esempio n. 5
0
 def __update_direction(self):
     d = ['d'] * self.data['num_variants']
     if self.args.def_protective is not None:
         upper = max(self.args.def_protective)
         d = ['p' if is_within(y, self.args.def_protective) else 'd' for y in self.data['function_score']]
     # adjust wrt function for neutral sites
     d = ['n' if y == 's' else x for x, y in zip(d, self.data['function_class'])]
     return d
Esempio n. 6
0
 def __update_function(self):
     f = ['ns'] * self.data['num_variants']
     if self.args.def_neutral is not None:
         f = [
             's' if is_within(y, self.args.def_neutral) else 'ns'
             for y in self.data['function_score']
         ]
     return f
Esempio n. 7
0
 def __update_direction(self):
     d = ["d"] * self.data["num_variants"]
     if self.args.def_protective is not None:
         upper = max(self.args.def_protective)
         d = ["p" if is_within(y, self.args.def_protective) else "d" for y in self.data["function_score"]]
     # adjust wrt function for neutral sites
     d = ["n" if y == "s" else x for x, y in zip(d, self.data["function_class"])]
     return d
Esempio n. 8
0
 def __scan_sfs(self):
     for group, loci_input in enumerate(self.file.data):
         if group >= self.limit:
             break
         # text sfs file does not have any haplotype pools
         loci_input['pool'] = [[]]
         if self.chip_file:
             cdata = self.chip_file.getdata(loci_input['name'])
             if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)):
                 continue
             loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']]
             assert len(loci_input['missing']) == len(loci_input['maf'])
         else:
             loci_input['missing'] = None
         if is_within(loci_input['num_variants'], self.args.def_valid_locus):
             if self.data_buffer is None:
                 self.result.append(Calculator(self.args, self.unknown_args,loci_input).run())
             else:
                 self.data_buffer.append(loci_input)
         self.pbar.update(group + 1)
Esempio n. 9
0
 def __update_direction_random(self):
     '''based on self.swap['direction']'''
     d = copy.deepcopy(self.swap['direction'])
     if self.args.proportion_detrimental is not None:
         # mark some as neutral
         p = 1 - self.args.proportion_detrimental
         d = ['n' if (rng.random() < p and x == 'd' and not is_within(y, self.args.def_disruptive)) else x
              for x,y in zip(d, self.data['function_score'])]
     if self.args.proportion_protective is not None:
         # mark some as neutral
         p = 1 - self.args.proportion_protective
         d = ['n' if (rng.random() < p and x == 'p') else x for x in d]
     return d
Esempio n. 10
0
 def __update_direction(self):
     d = ['d'] * self.data['num_variants']
     if self.args.def_protective is not None:
         upper = max(self.args.def_protective)
         d = [
             'p' if is_within(y, self.args.def_protective) else 'd'
             for y in self.data['function_score']
         ]
     # adjust wrt function for neutral sites
     d = [
         'n' if y == 's' else x
         for x, y in zip(d, self.data['function_class'])
     ]
     return d
Esempio n. 11
0
 def __update_direction_random(self):
     """based on self.swap['direction']"""
     d = copy.deepcopy(self.swap["direction"])
     if self.args.proportion_detrimental is not None:
         # mark some as neutral
         p = 1 - self.args.proportion_detrimental
         d = [
             "n" if (rng.random() < p and x == "d" and not is_within(y, self.args.def_disruptive)) else x
             for x, y in zip(d, self.data["function_score"])
         ]
     if self.args.proportion_protective is not None:
         # mark some as neutral
         p = 1 - self.args.proportion_protective
         d = ["n" if (rng.random() < p and x == "p") else x for x in d]
     return d
Esempio n. 12
0
 def __update_direction_random(self):
     '''based on self.swap['direction']'''
     d = copy.deepcopy(self.swap['direction'])
     if self.args.proportion_detrimental is not None:
         # mark some as neutral
         p = 1 - self.args.proportion_detrimental
         d = [
             'n' if (rng.random() < p and x == 'd'
                     and not is_within(y, self.args.def_disruptive)) else x
             for x, y in zip(d, self.data['function_score'])
         ]
     if self.args.proportion_protective is not None:
         # mark some as neutral
         p = 1 - self.args.proportion_protective
         d = ['n' if (rng.random() < p and x == 'p') else x for x in d]
     return d
Esempio n. 13
0
 def __update_function(self):
     f = ['ns'] * self.data['num_variants']
     if self.args.def_neutral is not None:
         f = ['s' if is_within(y, self.args.def_neutral) else 'ns' for y in self.data['function_score']]
     return f
Esempio n. 14
0
 def __update_function(self):
     f = ["ns"] * self.data["num_variants"]
     if self.args.def_neutral is not None:
         f = ["s" if is_within(y, self.args.def_neutral) else "ns" for y in self.data["function_score"]]
     return f