def parser(self, log_file): """ @summary : Parse the command results to add information in log_file. @log_file : [str] Path to the sample process log file. """ # Parse output FH_output_seq = SequenceFileReader.factory(self.output_seq) nb_cleaned = 0 for record in FH_output_seq: nb_cleaned += 1 FH_output_seq.close() # Write result FH_log = Logger(log_file) FH_log.write('Results:\n') FH_log.write("\tnb seq with 3' primer : " + str(nb_cleaned) + '\n') FH_log.close()
def parser(self, log_file): """ @summary : Parse the command results to add information in log_file. @log_file : [str] Path to the sample process log file. """ # Parse output FH_output_seq = SequenceFileReader.factory( self.output_seq ) nb_cleaned = 0 for record in FH_output_seq: nb_cleaned += 1 FH_output_seq.close() # Write result FH_log = Logger( log_file ) FH_log.write( 'Results:\n' ) FH_log.write( "\tnb seq with 3' primer : " + str(nb_cleaned) + '\n' ) FH_log.close()
def get_seq_length(input_file, size_separator=None): """ @summary: Returns the number of sequences by sequences lengths. @param input_file: [str] The sequence file path. @param size_separator: [str] If it exists the size separator in sequence ID. @return: [dict] By sequences lengths the number of sequence. """ nb_by_length = dict() FH_seq = SequenceFileReader.factory(input_file) for record in FH_seq: nb_seq = 1 if size_separator is not None: nb_seq = int(record.id.rsplit(size_separator, 1)[-1]) seq_length = len(record.string) if not nb_by_length.has_key(str(seq_length)): nb_by_length[str(seq_length)] = 0 nb_by_length[str(seq_length)] += nb_seq FH_seq.close() return nb_by_length
def get_seq_length( input_file, size_separator=None ): """ @summary: Returns the number of sequences by sequences lengths. @param input_file: [str] The sequence file path. @param size_separator: [str] If it exists the size separator in sequence ID. @return: [dict] By sequences lengths the number of sequence. """ nb_by_length = dict() FH_seq = SequenceFileReader.factory( input_file ) for record in FH_seq: nb_seq = 1 if size_separator is not None: nb_seq = int(record.id.rsplit(size_separator, 1)[-1]) seq_length = len(record.string) if not nb_by_length.has_key(str(seq_length)): nb_by_length[str(seq_length)] = 0 nb_by_length[str(seq_length)] += nb_seq FH_seq.close() return nb_by_length
# Get observation sequences observation_id_by_seq = dict() FH_seeds = FastaIO(args.seeds_fasta) for record in FH_seeds: if record.string in observation_id_by_seq: raise Exception("The OTU '" + observation_id_by_seq[record.string] + "' and '" + record.id + "' have the same sequence.") observation_id_by_seq[record.string] = record.id.split(";size=")[0] FH_seeds.close() # Get centroids of observation reference_by_observation_id = dict() for file in args.reads: FH_reads = SequenceFileReader.factory(file) for record in FH_reads: if record.string in observation_id_by_seq: observation_id = observation_id_by_seq[record.string] reference_id = re.search("reference=([^\s]+)", record.description).group(1) if observation_id not in reference_by_observation_id: reference_by_observation_id[observation_id] = reference_id elif len(reference_by_observation_id[observation_id].split( ",")) > len(reference_id.split(",")): reference_by_observation_id[observation_id] = reference_id FH_reads.close() if len(observation_id_by_seq) != len(reference_by_observation_id): missing = list() for seed_seq in observation_id_by_seq: if observation_id_by_seq[
# Get observation sequences nb_observations = 0 observation_ids_by_seq = dict() FH_seeds = FastaIO(args.input) for record in FH_seeds: nb_observations += 1 if record.string not in observation_ids_by_seq: observation_ids_by_seq[record.string] = list() observation_ids_by_seq[record.string].append(record.id) FH_seeds.close() # Get centroids (the real centroid and indentical sequences) ID by observation observation_ids_by_centroid_id = dict() for file in args.trimmed_reads: FH_reads = SequenceFileReader.factory(file) for record in FH_reads: record_seq = record.string.replace("-", "").replace(".", "") if record_seq in observation_ids_by_seq: observation_ids_by_centroid_id[record.id] = observation_ids_by_seq[record_seq] FH_reads.close() # Get reference by observation reference_by_observation_id = dict() for file in args.reads: FH_reads = SequenceFileReader.factory(file) for record in FH_reads: if record.id in observation_ids_by_centroid_id: observation_ids = observation_ids_by_centroid_id[record.id] reference_id = re.search("reference=([^\s]+)", record.description).group(1) for current_obs_id in observation_ids: