Example #1
0
	def __init__(self, name, sequence, markers):
		"""Class constructor Genotype

		Genotype Class is characterized by :
		Haplotype legacy of class :
			-A name,
			-a sequence, 
			-a size who is the length of markers and
			-the list of there markers
		Genotype specific to class :
			#generate during the 1st run
			-A number of Homozygous (Hmz) markers ('A', 'C', 'G' or 'T') (default = 0) maybe '--' for the new_haplotype,
			-a number of Heterozygous (Htz) markers (ex : '--' or 'A/G'), (default = 0)
			-a list of the Htz indexs in the Genotype object sequence,
			-a list of Haplotype(s) object(s) who are similar to our genotype
			(sum of errors in each markers positions = ... 0 1 ou 2) 
			-the size of this list (default = 0).
			-a list of probable combination of similar haplotypes that allows us to
			obtain the genotype.
			-the size of this list (default = 0).
			-a new haplotype list filled if there is at least 1 similar haplotype and
			no prbable combination.
			-the size of this list (default = 0).
			
			#generate during the 2nd run
			-a new list of probable combination of similar haplotypes that allows us to
			obtain the genotype.Because, we add in similar haplotype list the new haplotype
			created during the first run which are similar to our genotype
			-the size of this list (default = 0).

			something more?

		"""
		#Haplotype legacy of class
		Haplotype.__init__(self, name, sequence, markers)
		#Genotype specific to class
		self._hmz_nb_of_markers = 0
		self._htz_nb_of_markers = 0
		self._index_htz_markers = [] #liste des positions Htz dans seq du genotype (rend plus rapide la comparaison entre haplo possiblement combiné pour donner seq du génotype)
		
		#supp
		self._similar_haplotype = [] #une liste de sequences (eux même des liste de caractères)					 # = half_similarity hérité de Haplotype
		self._number_of_similar_haplotype = 0 #taille de la liste obtenue ci-dessus
		
		self._probable_haplotypes_combinaison = [] #liste de liste (ex: [[haplo1, haplo4], [haplo20, haplo79]])
		self._number_of_probable_haplotypes_combinaison = 0	
		self._lst_of_new_haplotype = [] #une liste de nouveau(x) (pour le moment liste de la séqeunce seul) haplotype(s) créé à partir de l'objet Génotype en question
		self._number_of_new_created_haplotype = 0 #taille de la liste obtenue ci-dessus
		self._probable_haplotypes_combinaison_2_run = [] # nouvelle liste de combinaison 
		self._number_of_probable_haplotypes_combinaison_2_run = 0
Example #2
0
	def __init__(self, name, sequence, markers):
		"""Class constructor Candidat

		Haplotype legacy of class :
			-A name,
			-a sequence, 
			-a size who is the length of markers and
			-the list of there markers



		"""
		#Haplotype legacy of class
		Haplotype.__init__(self, name, sequence, markers)
		#Candidat specific to class
		self._geno_ori = ""
		self._haplo_ori = ""
Example #3
0
	def __init__(self, name, sequence, markers):
		"""Class constructor FastPhase

		Haplotype legacy of class :
			-A name,
			-a sequence, 
			-a size who is the length of markers and
			-the list of there markers



		"""
		#Haplotype legacy of class
		Haplotype.__init__(self, name, sequence, markers)
		#FastPhase specific to class
		self._estimated_freq = ""
		self._estimated_square = ""
Example #4
0
def pattern_selection(project_directory, **kwargs):
    logger = logging.getLogger(__name__)
    logger.info("BEGIN Pattern Selection")
    args = _set_parameters(**kwargs)
    start_time = time.time()
    _check_inputs(args['max_loci'], args['required_loci'],
                  args['exclude_loci'])
    history = History(project_directory.make_new_file(
        "history", "pattern_selection_history"),
                      "Pattern_Selection",
                      project_directory.timestamp,
                      param_dict=args)

    preprocessing_history = History(
        project_directory.get_parent_subdirectory_file(
            "history", "preprocessing_history_{}.txt".format(
                project_directory.get_parent_directory_timestamp())),
        "Preprocessing",
        exists=True)

    # Get JSON file path from preprocessing step
    json_file = preprocessing_history.get_path("PATTERN JSON")
    variant_matrix = preprocessing_history.get_path("VARIANT SITE MATRIX FILE")
    sep = {
        'comma': ",",
        "space": " ",
        "tab": "\t"
    }[preprocessing_history.get_parameter("SEP")]

    # Get flag file path from preprocessing step
    flag_file = preprocessing_history.get_path("PRIMER ZONE FLAGS")
    primer_zone_size = preprocessing_history.get_parameter("PZ_SIZE")

    history.add_path("PATTERN JSON", json_file)
    logger.info("Reading from pattern JSON: %s", json_file)
    # Read in pattern JSON
    patterns = Patterns()
    patterns.load_patterns(json_file)
    if len(args['exclude_loci']):
        patterns.remove_sites(args['exclude_loci'])
    if len(args['required_loci']):
        patterns.add_required_sites(args['required_loci'])
    if len(args['exclude_strains']):
        patterns.remove_strains(args['exclude_strains'])
    patterns.set_resolution(args['res'], args['stop_at_res'])
    best_set = _get_minimum_spanning_set(
        patterns, args['reps'], args['max_loci'], args['max_res'],
        args['n_threads'], int(preprocessing_history.get_parameter("PZ_SIZE")))

    haplotype_file = project_directory.make_new_file("minimum_spanning_set",
                                                     ".haplotype", "csv")
    amplicon_json = project_directory.make_new_file("minimum_spanning_set",
                                                    ".amplicons", "json")
    haplotype_matrix = project_directory.make_new_file("minimum_spanning_set",
                                                       "haplotypes", "csv")
    amplicon_matrix = project_directory.make_new_file("minimum_spanning_set",
                                                      "amplicons", "csv")
    pattern_matrix = project_directory.make_new_file("minimum_spanning_set",
                                                     "patterns", "csv")
    summary_file = project_directory.make_new_file("summary", "summary")

    haplotype = Haplotype(patterns, best_set, flag_file, primer_zone_size,
                          variant_matrix, sep)

    haplotype.write_haplotype(haplotype_file)
    history.add_path("Haplotype File", haplotype_file)

    haplotype.write_json(amplicon_json)
    history.add_path("Amplicon JSON", amplicon_json)

    haplotype.write_summary(summary_file)
    history.add_path("Summary", summary_file)

    haplotype.write_output(haplotype_matrix, pattern_matrix, amplicon_matrix)
    history.add_path("Haplotype Matrix", haplotype_matrix)
    history.add_path("Amplicon Matrix", amplicon_matrix)
    history.add_path("Pattern Matrix", pattern_matrix)

    logger.info("FINISHED Pattern Selection")
    run_time = time.time() - start_time
    history.add_other("Run Time", run_time)
    history.write()