def make_empty_library_ini(type, microhaplotypes=False): ini = RawConfigParser(allow_no_value=True) ini.optionxform = str ini.add_comment = MethodType(ini_add_comment, ini) # Create sections and add comments to explain how to use them. ini.add_section("genome_position") ini.add_comment( "genome_position", #smart, str, non-str, full "Specify the chromosome number and positions of the first and last reported nucleotide of " "each marker (both inclusive, using human genome build GRCh38%s). This range should not " "include the primer binding sites.%s" % (" and rCRS for human mtDNA" if type != "str" else "", " This section is required for automatic configuration of markers; it is optional " "for markers that are explictily configured in this library file." if type != "smart" else "")) if type != "str": ini.add_comment( "genome_position", "Specify 'M' as the chromosome name for markers on mitochondrial " "DNA. Allele names generated for these markers will follow mtDNA " "nomenclature guidelines (Parson et al., 2014). If one of your " "mtDNA fragments starts near the end of the reference sequence " "and continues at the beginning, you can obtain correct base " "numbering by specifying the fragment's genome position as \"M, " "(starting position), 16569, 1, (ending position)\". This tells " "FDSTools that the marker is a concatenation of two fragments, " "where the first fragment ends at position 16569 and the second " "fragment starts at position 1. Similarly, for a fragment that " "spans position 3107 in the rCRS (which is nonexistent), you may " "specify \"M, (starting position), 3106, 3108, (ending " "position)\".") if microhaplotypes or type == "full": ini.add_section("microhaplotype_positions") ini.add_comment( "microhaplotype_positions", "For each microhaplotype marker, specify one or more positions of SNPs that should " "be reported as part of the microhaplotype.%s" % (" If the [genome_position] of the marker is given, positions must be within the " "given range. Otherwise, the reference sequence must be explicitly provided in " "the [no_repeat] section position 1 refers to the first base in the reference " "sequence." if type in ("non-str", "full") else "")) ini.add_section("flanks") ini.add_comment( "flanks", "The TSSV tool will use a pair of short anchor sequences just outside the reported range " "of each marker (e.g., primer sequences) to identify which input reads correspond to " "which marker. %s The sequence may contain IUPAC codes for ambiguous positions to account " "for degenerate bases in the primers or for bisulfite-converted targets in methylation-" "based studies (e.g., Y matches either C or T)." % ("Specify two comma-separated values: left flank and right flank sequence, in the same " "sequence orientation (strand)." if type in ("str", "non-str") else ("The default length of the anchor sequences used can be specified as an argument to " "the TSSV tool. Individual alternative lengths can be specified here for each marker. " "Specify two comma-separated values: one for the left and one for the right flank. " "The value can be a number (the length of sequence to use) or an explicit anchor " "sequence to use.%s" % (" For markers configured explicitly in this library file, the anchor sequences " "must be specified explicitly as well." if type == "full" else ""))) ) ini.add_section("max_expected_copies") ini.add_comment( "max_expected_copies", "By default, the Allelefinder tool will report up to 2 alleles per marker, but only a " "single allele for markers %son the Y chromosome. If this is incorrect, specify the " "maximum expected number of copies (i.e., alleles) for each marker in a " "single-contributor reference sample here." % ("on mitochondrial DNA or " if type != "str" else "")) ini.add_section("expected_allele_length") ini.add_comment( "expected_allele_length", "Specify one or two values for each marker. The first value gives the " "expected minimum length (in nucleotides, %sexcluding flanks) of the " "alleles and the second value (if given) specifies the maximum allele " "length expected for that marker (both inclusive). The TSSV tool will filter " "sequences that have a length outside this range." % ("including prefix and suffix, " if type in ("str", "full") else "")) if type in ("str", "full"): ini.add_section("prefix") ini.add_comment( "prefix", "For explicitly-configured STR markers: Specify the prefix sequence of each STR " "marker. The prefix is the sequence between the left flank and the repeat and is " "omitted from allele names. The sequence is used as the reference sequence for that " "marker when generating allele names. Deviations from the reference are expressed as " "variants.") ini.add_section("suffix") ini.add_comment( "suffix", "For explicitly-configured STR markers: Specify the suffix sequence of each STR " "marker. The suffix is the sequence between the repeat and the right flank. The " "sequence is used as the reference sequence for that marker when generating allele " "names.") ini.add_section("repeat") ini.add_comment( "repeat", "For explicitly-configured STR markers: Specify the repeat structure of each STR " "marker in space-separated triples of sequence, minimum number of repeats, and " "maximum number of repeats.") ini.add_section("length_adjust") ini.add_comment( "length_adjust", "For explicitly-configured STR markers: To prevent discrepancies between traditional " "CE allele numbers and the CE number in FDSTools allele names, the CE allele number " "as calculated by FDSTools is based on the length of the repeat sequence minus the " "adjustment specified here.") ini.add_section("block_length") ini.add_comment( "block_length", "For explicitly-configured STR markers: Specify the repeat unit length of each STR " "marker. By default, the length of the repeat unit of the longest repeat is used." ) if type in ("non-str", "full"): ini.add_section("no_repeat") ini.add_comment( "no_repeat", "For explicitly-configured non-STR markers: Specify the reference sequence for each " "non-STR marker.") return ini