def __post_init__(self):
     os.makedirs(self.download_directory, exist_ok=True)
     file_name = os.path.basename(self.input_path)
     file_prefix = StrConverter.extract_file_name(file_name)
     self.kegg_result_path = os.path.join(
         self.output_directory, '%s_kegg_result.txt' % file_prefix)
     self.kegg_error_path = os.path.join(self.output_directory,
                                         '%s_kegg_error.txt' % file_prefix)
     self.logger = LoggerFactory()
    def __init__(self, file_path, ignore_gene=False, enable_debug_info=False):
        self.ignore_gene = ignore_gene
        self.gene_segments = []
        self.dna_code = []
        self.gene_name_segment_map = {}
        self.source = None

        self.enable_debug_info = enable_debug_info
        self.file_path = file_path
        self.logger = LoggerFactory(1)
 def __post_init__(self):
     self.logger = LoggerFactory(1)
     file_prefix = StrConverter.extract_file_name(self.rna_tag)
     self.cluster_result_path = os.path.join(
         self.output_directory, '%s_cluster_result.txt' % file_prefix)
     self.sample_result_path = os.path.join(
         self.output_directory, '%s_sample_result.txt' % file_prefix)
     self.all_result_path = os.path.join(self.output_directory,
                                         '%s_all_result.txt' % file_prefix)
     self.only_result_path = os.path.join(
         self.output_directory, '%s_only_result.txt' % file_prefix)
Exemple #4
0
 def __post_init__(self):
     self.inter_path = self.input_path if self.mode == 'inter' else None
     self.rna_path = self.input_path if self.mode == 'rna' else None
     file_name = os.path.basename(self.input_path)
     file_prefix = StrConverter.extract_file_name(file_name)
     suffix = 'stream_%d' % self.limit if self.mode == 'rna' else 'gene'
     self.result_path = os.path.join(
         self.output_directory, '%s_%s_result.txt' % (file_prefix, suffix))
     self.gene_reader = GeneFileReader(self.data_path)
     self.logger = LoggerFactory()
     self.headers = {}
     self.inv_headers = []
Exemple #5
0
    def __post_init__(self):
        self.data_name = os.path.basename(self.data_path)
        file_name = os.path.basename(self.gene_path)
        file_prefix = StrConverter.extract_file_name(file_name)
        self.result_path = os.path.join(self.output_directory,
                                        '%s_match_result.txt' % (file_prefix))
        self.gene_reader = GeneFileReader(self.data_path)
        self.dna_code = None
        self.rev_dna_code = None
        self.logger = LoggerFactory()

        self.lock = threading.Lock()
        self.solved = 0
        self.total = 0
        self.weighted_sum = sum(self.weighted)
        assert self.weighted_sum > 0 and len(self.weighted) == 5
 def find_neighbor_batch(self, datas, iteration_time):
     fw = open(self.neighbor_result_path, 'a')
     solve_cnt, success_cnt, total_cnt = 0, 0, len(datas)
     logger = LoggerFactory(1)
     logger.info_with_expire_time(
         '[Iteration %d]completed %d/%d=%.2f%%' %
         (iteration_time, solve_cnt, total_cnt,
          solve_cnt * 100.0 / total_cnt), solve_cnt, total_cnt)
     fe = open(
         self.error_result_path_prefix + ".iter-%d.txt" % iteration_time,
         'w')
     fail_datas = []
     for key, inter, additional in datas:
         solve_cnt += 1
         file_path = os.path.join(self.download_directory, key + '.txt')
         flag, data = self.download_and_analysis(key, inter, file_path)
         if flag:
             success_cnt += 1
             direction = '+' if (inter[0] < inter[1]) else '-'
             fw.write('>%s/%s-%s(%s)\n' %
                      (key, inter[0], inter[1], direction))
             if additional != '':
                 for kv in additional.split(','):
                     k, v = kv.split('=')
                     fw.write('%s\t%s\n' % (k, v))
             fw.write('SOURCE\t%s\n' % (data.get('source', 'UNKNOWN')))
             for elem in data['data']:
                 fw.write('%s\n' % elem)
             fw.write('sequence\t%s\n' % (data.get('sequence', '')))
             fw.write('\n')
             fw.flush()
         else:
             if os.path.exists(file_path):
                 os.remove(file_path)
             fe.write('>%s/%s-%s\n' % (key, inter[0], inter[1]))
             fe.flush()
             fail_datas.append([key, inter])
         self.logger.info_with_expire_time(
             '[Iteration %d]completed %d/%d=%.2f%%, success %d/%d=%.2f%%' %
             (iteration_time, solve_cnt, total_cnt,
              solve_cnt * 100.0 / total_cnt, success_cnt, solve_cnt,
              success_cnt * 100.0 / solve_cnt), solve_cnt, total_cnt)
         time.sleep(random.random())
     self.logger.info('[Iteration %d]done .' % iteration_time)
     fw.close()
     return fail_datas
    def __post_init__(self):
        self.logger = LoggerFactory(3)

        file_name = os.path.basename(self.input_path)
        file_prefix = StrConverter.extract_file_name(file_name)
        self.neighbor_result_path = os.path.join(
            self.output_directory, '%s_neighbor_result.txt' % file_prefix)
        self.next_gene_result_path = os.path.join(
            self.output_directory, '%s_next_neighbor_result.txt' % file_prefix)
        self.source_count_path = os.path.join(
            self.output_directory, '%s_source_count_result.txt' % file_prefix)
        self.gene_count_path = os.path.join(
            self.output_directory, '%s_gene_count_result.txt' % file_prefix)

        error_directory = os.path.join(self.output_directory, 'error')
        if not os.path.exists(error_directory):
            os.makedirs(error_directory)
        self.error_result_path_prefix = os.path.join(
            error_directory, '%s_error_result' % file_prefix)
    def __post_init__(self):
        self.from_gene_names = self.ecocyc_params['from_gene_names']
        self.output_best_promoter = self.ecocyc_params['output_best_promoter']
        self.output_detail_information = self.ecocyc_params[
            'output_detail_information']
        self.analysis_promoter = self.ecocyc_params['analysis_promoter']
        self.if_get_summary = self.ecocyc_params['if_get_summary']
        self.if_get_go_table = self.ecocyc_params['if_get_go_table']
        self.sequence_start_idx = None
        self.sequence_end_idx = None
        self.headers = {}
        self.inv_headers = []

        file_name = os.path.basename(self.input_path)
        file_prefix = StrConverter.extract_file_name(file_name)
        self.ecocyc_result_path = os.path.join(
            self.output_directory, '%s_ecocyc_result.txt' % file_prefix)
        self.ecocyc_error_path = os.path.join(
            self.output_directory, '%s_ecocyc_error.txt' % file_prefix)
        self.logger = LoggerFactory()