コード例 #1
0
def make_empty_library_ini(type, microhaplotypes=False):
    ini = RawConfigParser(allow_no_value=True)
    ini.optionxform = str
    ini.add_comment = MethodType(ini_add_comment, ini)

    # Create sections and add comments to explain how to use them.
    ini.add_section("genome_position")
    ini.add_comment(
        "genome_position",  #smart, str, non-str, full
        "Specify the chromosome number and positions of the first and last reported nucleotide of "
        "each marker (both inclusive, using human genome build GRCh38%s). This range should not "
        "include the primer binding sites.%s" %
        (" and rCRS for human mtDNA" if type != "str" else "",
         " This section is required for automatic configuration of markers; it is optional "
         "for markers that are explictily configured in this library file."
         if type != "smart" else ""))
    if type != "str":
        ini.add_comment(
            "genome_position",
            "Specify 'M' as the chromosome name for markers on mitochondrial "
            "DNA. Allele names generated for these markers will follow mtDNA "
            "nomenclature guidelines (Parson et al., 2014). If one of your "
            "mtDNA fragments starts near the end of the reference sequence "
            "and continues at the beginning, you can obtain correct base "
            "numbering by specifying the fragment's genome position as \"M, "
            "(starting position), 16569, 1, (ending position)\". This tells "
            "FDSTools that the marker is a concatenation of two fragments, "
            "where the first fragment ends at position 16569 and the second "
            "fragment starts at position 1. Similarly, for a fragment that "
            "spans position 3107 in the rCRS (which is nonexistent), you may "
            "specify \"M, (starting position), 3106, 3108, (ending "
            "position)\".")
    if microhaplotypes or type == "full":
        ini.add_section("microhaplotype_positions")
        ini.add_comment(
            "microhaplotype_positions",
            "For each microhaplotype marker, specify one or more positions of SNPs that should "
            "be reported as part of the microhaplotype.%s" %
            (" If the [genome_position] of the marker is given, positions must be within the "
             "given range. Otherwise, the reference sequence must be explicitly provided in "
             "the [no_repeat] section position 1 refers to the first base in the reference "
             "sequence." if type in ("non-str", "full") else ""))
    ini.add_section("flanks")
    ini.add_comment(
        "flanks",
        "The TSSV tool will use a pair of short anchor sequences just outside the reported range "
        "of each marker (e.g., primer sequences) to identify which input reads correspond to "
        "which marker. %s The sequence may contain IUPAC codes for ambiguous positions to account "
        "for degenerate bases in the primers or for bisulfite-converted targets in methylation-"
        "based studies (e.g., Y matches either C or T)." %
        ("Specify two comma-separated values: left flank and right flank sequence, in the same "
         "sequence orientation (strand)." if type in ("str", "non-str") else
         ("The default length of the anchor sequences used can be specified as an argument to "
          "the TSSV tool. Individual alternative lengths can be specified here for each marker. "
          "Specify two comma-separated values: one for the left and one for the right flank. "
          "The value can be a number (the length of sequence to use) or an explicit anchor "
          "sequence to use.%s" %
          (" For markers configured explicitly in this library file, the anchor sequences "
           "must be specified explicitly as well." if type == "full" else "")))
    )
    ini.add_section("max_expected_copies")
    ini.add_comment(
        "max_expected_copies",
        "By default, the Allelefinder tool will report up to 2 alleles per marker, but only a "
        "single allele for markers %son the Y chromosome. If this is incorrect, specify the "
        "maximum expected number of copies (i.e., alleles) for each marker in a "
        "single-contributor reference sample here." %
        ("on mitochondrial DNA or " if type != "str" else ""))
    ini.add_section("expected_allele_length")
    ini.add_comment(
        "expected_allele_length",
        "Specify one or two values for each marker. The first value gives the "
        "expected minimum length (in nucleotides, %sexcluding flanks) of the "
        "alleles and the second value (if given) specifies the maximum allele "
        "length expected for that marker (both inclusive). The TSSV tool will filter "
        "sequences that have a length outside this range." %
        ("including prefix and suffix, " if type in ("str", "full") else ""))

    if type in ("str", "full"):
        ini.add_section("prefix")
        ini.add_comment(
            "prefix",
            "For explicitly-configured STR markers: Specify the prefix sequence of each STR "
            "marker. The prefix is the sequence between the left flank and the repeat and is "
            "omitted from allele names. The sequence is used as the reference sequence for that "
            "marker when generating allele names. Deviations from the reference are expressed as "
            "variants.")
        ini.add_section("suffix")
        ini.add_comment(
            "suffix",
            "For explicitly-configured STR markers: Specify the suffix sequence of each STR "
            "marker. The suffix is the sequence between the repeat and the right flank. The "
            "sequence is used as the reference sequence for that marker when generating allele "
            "names.")
        ini.add_section("repeat")
        ini.add_comment(
            "repeat",
            "For explicitly-configured STR markers: Specify the repeat structure of each STR "
            "marker in space-separated triples of sequence, minimum number of repeats, and "
            "maximum number of repeats.")
        ini.add_section("length_adjust")
        ini.add_comment(
            "length_adjust",
            "For explicitly-configured STR markers: To prevent discrepancies between traditional "
            "CE allele numbers and the CE number in FDSTools allele names, the CE allele number "
            "as calculated by FDSTools is based on the length of the repeat sequence minus the "
            "adjustment specified here.")
        ini.add_section("block_length")
        ini.add_comment(
            "block_length",
            "For explicitly-configured STR markers: Specify the repeat unit length of each STR "
            "marker. By default, the length of the repeat unit of the longest repeat is used."
        )
    if type in ("non-str", "full"):
        ini.add_section("no_repeat")
        ini.add_comment(
            "no_repeat",
            "For explicitly-configured non-STR markers: Specify the reference sequence for each "
            "non-STR marker.")
    return ini