def __scan_sfs(self): for group, loci_input in enumerate(self.file.data): if group >= self.limit: break # text sfs file does not have any haplotype pools loci_input['pool'] = [[]] if self.chip_file: cdata = self.chip_file.getdata(loci_input['name']) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [ False if x in cdata['pos'] else True for x in loci_input['pos'] ] assert len(loci_input['missing']) == len(loci_input['maf']) else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append( Calculator(self.args, self.unknown_args, loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)
def __scan_gdat(self): '''scan gdat file''' maf = 'maf' pos = 'position' function_score = 'annotation' # Allow for customized key names in gdat file try: for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1), getColumn(self.args.data[:-5] + '.key', 2)): if x == 'maf': maf = y if x == 'position': pos = y if x == 'annotation': function_score = y except: pass # for group, item in enumerate(self.groups): if group >= self.limit: break data = self.file.getdata(item) if self.args.resampling: data.decompress() else: data['haplotype'] = [[]] try: loci_input = { 'pool': data['haplotype'], 'name': item, 'maf': list(data[maf]), 'pos': list(data[pos]), 'function_score': list(data[function_score]) } except KeyError as e: env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\ format(e, self.args.data[:-5])) continue loci_input['num_variants'] = len(loci_input['maf']) if self.chip_file: cdata = self.chip_file.getdata(item) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [ False if x in cdata['pos'] else True for x in loci_input['pos'] ] else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append( Calculator(self.args, self.unknown_args, loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)
def __scan_gdat(self): """scan gdat file""" maf = "maf" pos = "position" function_score = "annotation" # Allow for customized key names in gdat file try: for x, y in zip(getColumn(self.args.data[:-5] + ".key", 1), getColumn(self.args.data[:-5] + ".key", 2)): if x == "maf": maf = y if x == "position": pos = y if x == "annotation": function_score = y except: pass # for group, item in enumerate(self.groups): if group >= self.limit: break data = self.file.getdata(item) if self.args.resampling: data.decompress() else: data["haplotype"] = [[]] try: loci_input = { "pool": data["haplotype"], "name": item, "maf": list(data[maf]), "pos": list(data[pos]), "function_score": list(data[function_score]), } except KeyError as e: env.logger.error( "Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.".format( e, self.args.data[:-5] ) ) continue loci_input["num_variants"] = len(loci_input["maf"]) if self.chip_file: cdata = self.chip_file.getdata(item) if cdata is None or (not is_within(cdata["num_variants"], self.args.def_valid_locus)): continue loci_input["missing"] = [False if x in cdata["pos"] else True for x in loci_input["pos"]] else: loci_input["missing"] = None if is_within(loci_input["num_variants"], self.args.def_valid_locus): if self.data_buffer is None: self.result.append(Calculator(self.args, self.unknown_args, loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)
def __scan_gdat(self): '''scan gdat file''' maf = 'maf' pos = 'position' function_score = 'annotation' # Allow for customized key names in gdat file try: for x, y in zip(getColumn(self.args.data[:-5] + '.key', 1), getColumn(self.args.data[:-5] + '.key', 2)): if x == 'maf': maf = y if x == 'position': pos = y if x == 'annotation': function_score = y except: pass # for group, item in enumerate(self.groups): if group >= self.limit: break data = self.file.getdata(item) if self.args.resampling: data.decompress() else: data['haplotype'] = [[]] try: loci_input = {'pool':data['haplotype'], 'name':item, 'maf':list(data[maf]), 'pos':list(data[pos]), 'function_score':list(data[function_score])} except KeyError as e: env.logger.error('Column name {} not found. Please provide [{}.key] file to overwrite column naming conventions.'.\ format(e, self.args.data[:-5])) continue loci_input['num_variants'] = len(loci_input['maf']) if self.chip_file: cdata = self.chip_file.getdata(item) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']] else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append(Calculator(self.args, self.unknown_args,loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)
def __update_direction(self): d = ['d'] * self.data['num_variants'] if self.args.def_protective is not None: upper = max(self.args.def_protective) d = ['p' if is_within(y, self.args.def_protective) else 'd' for y in self.data['function_score']] # adjust wrt function for neutral sites d = ['n' if y == 's' else x for x, y in zip(d, self.data['function_class'])] return d
def __update_function(self): f = ['ns'] * self.data['num_variants'] if self.args.def_neutral is not None: f = [ 's' if is_within(y, self.args.def_neutral) else 'ns' for y in self.data['function_score'] ] return f
def __update_direction(self): d = ["d"] * self.data["num_variants"] if self.args.def_protective is not None: upper = max(self.args.def_protective) d = ["p" if is_within(y, self.args.def_protective) else "d" for y in self.data["function_score"]] # adjust wrt function for neutral sites d = ["n" if y == "s" else x for x, y in zip(d, self.data["function_class"])] return d
def __scan_sfs(self): for group, loci_input in enumerate(self.file.data): if group >= self.limit: break # text sfs file does not have any haplotype pools loci_input['pool'] = [[]] if self.chip_file: cdata = self.chip_file.getdata(loci_input['name']) if cdata is None or (not is_within(cdata['num_variants'], self.args.def_valid_locus)): continue loci_input['missing'] = [False if x in cdata['pos'] else True for x in loci_input['pos']] assert len(loci_input['missing']) == len(loci_input['maf']) else: loci_input['missing'] = None if is_within(loci_input['num_variants'], self.args.def_valid_locus): if self.data_buffer is None: self.result.append(Calculator(self.args, self.unknown_args,loci_input).run()) else: self.data_buffer.append(loci_input) self.pbar.update(group + 1)
def __update_direction_random(self): '''based on self.swap['direction']''' d = copy.deepcopy(self.swap['direction']) if self.args.proportion_detrimental is not None: # mark some as neutral p = 1 - self.args.proportion_detrimental d = ['n' if (rng.random() < p and x == 'd' and not is_within(y, self.args.def_disruptive)) else x for x,y in zip(d, self.data['function_score'])] if self.args.proportion_protective is not None: # mark some as neutral p = 1 - self.args.proportion_protective d = ['n' if (rng.random() < p and x == 'p') else x for x in d] return d
def __update_direction(self): d = ['d'] * self.data['num_variants'] if self.args.def_protective is not None: upper = max(self.args.def_protective) d = [ 'p' if is_within(y, self.args.def_protective) else 'd' for y in self.data['function_score'] ] # adjust wrt function for neutral sites d = [ 'n' if y == 's' else x for x, y in zip(d, self.data['function_class']) ] return d
def __update_direction_random(self): """based on self.swap['direction']""" d = copy.deepcopy(self.swap["direction"]) if self.args.proportion_detrimental is not None: # mark some as neutral p = 1 - self.args.proportion_detrimental d = [ "n" if (rng.random() < p and x == "d" and not is_within(y, self.args.def_disruptive)) else x for x, y in zip(d, self.data["function_score"]) ] if self.args.proportion_protective is not None: # mark some as neutral p = 1 - self.args.proportion_protective d = ["n" if (rng.random() < p and x == "p") else x for x in d] return d
def __update_direction_random(self): '''based on self.swap['direction']''' d = copy.deepcopy(self.swap['direction']) if self.args.proportion_detrimental is not None: # mark some as neutral p = 1 - self.args.proportion_detrimental d = [ 'n' if (rng.random() < p and x == 'd' and not is_within(y, self.args.def_disruptive)) else x for x, y in zip(d, self.data['function_score']) ] if self.args.proportion_protective is not None: # mark some as neutral p = 1 - self.args.proportion_protective d = ['n' if (rng.random() < p and x == 'p') else x for x in d] return d
def __update_function(self): f = ['ns'] * self.data['num_variants'] if self.args.def_neutral is not None: f = ['s' if is_within(y, self.args.def_neutral) else 'ns' for y in self.data['function_score']] return f
def __update_function(self): f = ["ns"] * self.data["num_variants"] if self.args.def_neutral is not None: f = ["s" if is_within(y, self.args.def_neutral) else "ns" for y in self.data["function_score"]] return f