Ejemplo n.º 1
0
def calculate_cs219(threads, a3m_db_base, cs_db_base):
  hhlib_environment = os.environ['HHLIB']
  if(hhlib_environment):
    #TODO: check
    check_call(" ".join(["cstranslate", "-A ", os.path.join(hhlib_environment, "data/cs219.lib"), "-D", os.path.join(hhlib_environment, "data/context_data.lib"), "-x 0.3 -c 4 --ffindex", "-i", a3m_db_base, "-o", cs_db_base, "-I a3m -b"]), env=dict(os.environ, OMP_NUM_THREADS=threads), shell=True)
  else:
    sys.err("ERROR: HHLIB environment variable not set! See manual!\n")
    exit(1)
def MatchLabels(column_labels, row_labels):

    if len(column_labels) != len(row_labels):
        sys.err("ERROR 1st matrix column count " + str(len(column_labels)) +
                " not equal 2nd Matrix number row count " +
                str(len(row_labels)) + "\n")
    else:
        cnt = 0
        for k in range(0, len(column_labels)):
            if column_labels[k] != row_labels[k] and cnt < 20:
                cnt += 1
                #sys.err("ERROR At column & row position "+str(k)+" Matrix 1 column value "+str(column_labels)+" not equal 2nd Matrix row value "+str(row_labels)+"\n" )

        if cnt > 0:
            sys.exit(-11)
Ejemplo n.º 3
0
 def make_data_for_clustering(self, option, filter=False, sub_topics=False):
     documents = []
     targets = []
     if not sub_topics:
         topic_nums = range(1, 46)
         # get all files in all topics, partitioned by suptopic
         for i in topic_nums:
             if i not in [15, 17]:
                 # get all files in topic i
                 for f in self.get_topic(topic=i):
                     if option == 'doc_template':
                         # this returns a list, so converting to string
                         documents.append(' '.join(
                             self.get_text(f, element_type='doc_template')))
                         targets.append(str(i))
                     elif option == 'text':
                         documents.append(' '.join(
                             self.get_text(f,
                                           element_type='text',
                                           filter=filter)))
                         targets.append(str(i))
                     else:
                         print('unknown option')
                         sys.err(-1)
     else:
         topic_nums = range(1, 46)
         # get all files in all topics, partitioned by suptopic
         for i in topic_nums:
             if i not in [15, 17]:
                 # get all files in topic i
                 for f in self.get_topic(topic=i):
                     if option == 'doc_template':
                         #this returns a list, so converting to string
                         top = self.get_topic_num(f)
                         documents.append(' '.join(
                             self.get_text(f, element_type='doc_template')))
                         targets.append(top[0] + "_" + top[1])
                     elif option == 'text':
                         top = self.get_topic_num(f)
                         documents.append(' '.join(
                             self.get_text(f,
                                           element_type='text',
                                           filter=filter)))
                         targets.append(top[0] + "_" + top[1])
                     else:
                         print('unknown option')
                         sys.err(-1)
     return (documents, targets)
def processTif(infile, outpre, polyC, rows, lines, whiteLevel, debug):
    """
     Read a tif file containing uint16 data of "I" image data
     and apply the correction given by the polynomial(s) in polyC.
     White level "whiteLevel" must be provided.
     Have to convert to float format for correction and retur to uint16
     afterwards. A correction table in the reconstruction process would
     be more efficient.
     Results output written to outfile in same tif format.
    """
    try:
        #from PIL import Image
        import tifffile as tf
    except ImportError:
        #print("Import PIL failed")
        print("Import tifffile failed")
        sys.err(1)
    #im = Image.open(infile)
    imArr = tf.imread(infile)
    if len(imArr[0, :]) != lines or len(imArr[:, 0]) != rows:
        print("* Error: tiff image size is ", len(imArr[0, :]), " by ",
              len(imArr[:, 0]))
        print("Must set lines and rows the same")
        sys.exit(1)
    #imArr = np.array(im)
    if whiteLevel == 0:
        whiteLevel = imArr.max()
        if debug:
            print("Using whiteLevel=", whiteLevel)
    #
    imLn = np.log(float(whiteLevel) / imArr)

    for l in range(lines):
        ln = 0
        if len(polyC[:, 0]) == 1:
            ln = 0
        elif len(polyC[:, 0]) == lines:
            ln = l
        imLn[l, :] = correct(imLn[l, :], polyC[ln, ::-1])
    imCor = whiteLevel / np.exp(imLn)
    imCor16 = np.array(imCor, dtype='uint16')
    if debug:
        print("imCor16: ", imCor16[0:2, 0:2])
    #imOut = Image.fromarray(imCor16)
    #print("infile=",infile) #," outpre=",outpre)
    outfile = outpre + infile
    #imOut.save(outfile)
    tf.imsave(outfile, imCor16)
Ejemplo n.º 5
0
def calculate_cs219(threads, a3m_db_base, cs_db_base):
    hhlib_environment = os.environ['HHLIB']
    if (hhlib_environment):
        #TODO: check
        check_call(" ".join([
            "cstranslate", "-A ",
            os.path.join(hhlib_environment, "data/cs219.lib"), "-D",
            os.path.join(hhlib_environment,
                         "data/context_data.lib"), "-x 0.3 -c 4 --ffindex",
            "-i", a3m_db_base, "-o", cs_db_base, "-I a3m -b"
        ]),
                   env=dict(os.environ, OMP_NUM_THREADS=threads),
                   shell=True)
    else:
        sys.err("ERROR: HHLIB environment variable not set! See manual!\n")
        exit(1)
Ejemplo n.º 6
0
 def __init__(self, installDir, toolDir):
     self.installDir, self.toolDir = installDir, toolDir
     link = input("Git repository of the tool (full link): ")
     name = input("Tool name: ")
     ver = input("Python version: ")
     exe = input("Name of the file to launch (w/o extension): ")
     cmds = input("Custom command (leave blank if unsure): ")
     issudo = input(
         "Does the package needs root permissions? [y/N]: ").lower()
     #Add question if script has a different name
     #g.e: main.py insttead of <projectname>.py
     temp = 0
     if not cmds:
         if issudo != "y":
             cmds = "python{0} {1}{2}/{3}.py".format(
                 ver, self.toolDir, name, exe)
         else:
             cmds = "sudo python{0} {1}{2}/{3}.py".format(
                 ver, self.toolDir, name, exe)
     try:
         shell("git clone %s %s%s" % (link, self.toolDir, name))
         temp = 1
     except:
         temp = -1
         ErrorHandler(err(), False)
     if temp:
         dictmgr.addWords(self.installDir, [name])
         dictmgr.addCustomWords(self.installDir, name)
         dictmgr.updateConfig(self.installDir, name, cmds)
         print(
             "[*] - You may need to restart onifw in order to use the custom tool."
         )
Ejemplo n.º 7
0
def getHomVarInfo(conn, affected, parents) :

    myquery = ""
    if len(parents) == 2 : 
        print "Performing Two Parent Query"

        myquery = TWO_PARENT_SHARED_HOM_QUERY
        myquery = myquery.replace("$$A$$", str(affected))
        myquery = myquery.replace("$$P1$$", str(parents[0]))
        myquery = myquery.replace("$$P2$$", str(parents[1]))
        #print myquery
    elif len(parents) == 1 :
        print "Performing Single Parent Query"

        myquery = SINGLE_PARENT_SHARED_HOM_QUERY
        myquery = myquery.replace("$$A$$", str(affected))
        myquery = myquery.replace("$$P1$$", str(parents[0]))
    #elif len(parents) == 0 :
    #    print "Performing Affected Het Query"

    #    myquery = AFFECTED_HET_QUERY
    #    myquery = myquery.replace("$$A$$", str(affected))
    else :
        sys.err("What kind of family is this?" )
        sys.exit(1)

    # Time the Query
    start = time()

    variantdata = []
    index =0
    for row in conn.query( myquery ):
        #print index,":",row
        variantdata.append( "%s:%s" % (row[1], row[2]) )
        index += 1

    # Print Run Time
    elapsed = (time() - start)
    print "Elapsed:",elapsed
    return variantdata
Ejemplo n.º 8
0
def addCustomWords(installDir, name):
    """Add words to the custom dictionnary
    Arguments:
        - name : name of the custom tool
        - installDir : Directory of current install
    """
    print("[*] - Adding custom words...")
    try:
        with open("{}data/ctools.txt".format(installDir), "a") as f:
            f.write(name + "\n")
        f.close()
        print("[*] - Done.")
    except:
        ErrorHandler(err(), False)
Ejemplo n.º 9
0
def addWords(installDir, wordList):
    """Add words to dictionnary file
    
    Arguments:
        - wordList : Array of strings containing words to add
    """
    print("[*] - Adding dictionnary words...")
    try:
        with open("{}data/dict.txt".format(installDir), "a") as f:
            for i in range(len(wordList)):
                f.write(wordList[i] + "\n")
        f.close()
        print("[*] - Done.")
    except:
        ErrorHandler(err(), False)
Ejemplo n.º 10
0
 def __init__(self,
              tool_name,
              lang="",
              need_args=False,
              change_dir=False,
              exe_name="",
              pre_cmd="",
              post_cmd="",
              sudo=False):
     self.tool_name = tool_name
     self.lang = lang
     self.installDir = toolDir + self.tool_name
     self.prefix = self.lang
     self.arg = ""
     if not isdir(self.installDir):
         print(
             color.IMPORTANT +
             "[!] - Tool not installed or not located in the tool/ directory"
             + color.END)
         return
     for i in extensions:
         if i in self.lang:
             self.extension = extensions[i]
     if need_args:
         print(color.LOGGING + "[?] - Please specify a target" + color.END)
         self.arg = str(
             input(color.HEADER + "onifw[{}]: ".format(self.tool_name) +
                   color.END))
     if len(pre_cmd) > 1:
         self.extension += " " + pre_cmd
     if len(post_cmd) > 1:
         self.arg += " " + post_cmd
     if len(exe_name) > 1:
         self.exe_name = exe_name
     else:
         self.exe_name = self.tool_name
     if sudo:
         self.prefix = "sudo " + self.prefix
     self.cmd = "{0} {1}/{2}{3}".format(self.prefix, self.installDir,
                                        self.exe_name, self.extension)
     if change_dir:
         self.cmd = "cd {0} && {1}".format(self.installDir, self.cmd)
     if not len(self.arg) < 1:
         self.cmd += " " + self.arg
     try:
         system(self.cmd)
     except:
         ErrorHandler(err(), False)
Ejemplo n.º 11
0
def updateConfig(installDir, name, command):
    """Add launch command to the onirc file
    Arguments:
        - installDir : directory of current install
        - name : name of the tool
        - command : command used to launch the tool
    """
    print("[*] - Updating configuration...")
    try:
        with open("{}onirc".format(installDir), "a") as f:
            f.write("{0} = {1}\n".format(name, command))
        f.close()
        print("[*] - Done.")
    except:
        print(color.LOGGING + "[!] - Unexpected error: ")
        ErrorHandler(err(), False)
Ejemplo n.º 12
0
    def __init__(self, installDir, toolDir):
        self.installDir, self.toolDir = installDir, toolDir
        link = input("Git repository of the tool (full link): ")
        name = input("Tool name: ")
        nb_cmd = int(input("How many commands to build the tool?: "))
        try:
            shell("git clone %s %s %s" % (link, self.toolDir, name))
            for i in range(nb_cmd):
                print("[*] - Current directory: %s" % shell("pwd"))
                cmd = input("Custom command: ")
                shell(cmd)
            cmds = input("Launch command: ")
            dictmgr.addWords(self.installDir, name)
            dictmgr.addCustomWords(self.installDir, name)
            dictmgr.updateConfig(self.installDir, name, cmds)

        except:
            ErrorHandler(err(), False)
Ejemplo n.º 13
0
 def __init__(self, installDir, toolDir):
     lang_dict = {
         "perl": "perl",
         "ruby": "ruby",
         "go": "go",
         "java-jar": "jar",
         "java": "java",
     }
     self.installDir, self.toolDir = installDir, toolDir
     print(color.OKBLUE + "Available languages:")
     for i in lang_dict.keys():
         print(i)
     print(color.END)
     lang = input("Select lang: ")
     link = input("Git repository of the tool (full link): ")
     name = input("Tool name: ")
     name_exe = input("Name of the main file (w/ entension): ")
     nb_cmd = int(input("How many commands to build the tool?: "))
     try:
         shell("git clone %s %s%s" % (link, self.toolDir, name))
         for i in range(nb_cmd):
             print("[*] - Current directory: %s" % shell("pwd"))
             cmd = input("Custom command: ")
             shell(cmd)
         if lang == "java":
             cmds = "{0} = cd {1}{2} && {3}{4}".format(
                 name, toolDir, name, lang_dict[lang], name_exe)
         else:
             cmds = "{0} = {1} {2}{3}{4}".format(name, lang_dict[lang],
                                                 toolDir, name, name_exe)
         with open("{}onirc".format(self.installDir), "a") as f:
             f.write("{0} = {1}\n".format(name, cmds))
             f.close()
         with open("{}data/dict.txt".format(self.installDir), "a") as f:
             f.write(name + '\n')
             f.close()
         with open("{}data/ctools.txt".format(self.installDir), "a") as f:
             f.write(name + '\n')
             f.close()
     except:
         ErrorHandler(err(), False)
Ejemplo n.º 14
0
    def __init__(self, installDir):
        self.installDir = installDir
        if not self.check_branch():
            try:
                with open("{}data/version.txt".format(installDir)) as f:
                    local_version = version.parse(
                        f.readlines()[0].rstrip("\n\r"))
                f.close()

                latest_version = check_output(
                    "curl -s https://raw.githubusercontent.com/w0bos/onifw/master/src/data/version.txt",
                    shell=True).decode("utf-8").strip('\r\n')
                late = version.parse(latest_version)
                if late > local_version:
                    ans = input(
                        color.NOTICE +
                        "[*] - A new version is available\nDo you wish to install the new update? [y/N] :"
                        + color.END)
                    if ans.lower() in ["yes", "y"]:
                        # Won't wipe old install
                        shell("cd {} && git pull".format(installDir))
                    else:
                        print("[*] - Update aborted")

                elif late == local_version:
                    print(
                        color.OKGREEN +
                        "[*] - You're already running the latest version of onifw"
                        + color.END)
                elif late < local_version:
                    print(color.BOLD + color.IMPORTANT +
                          "[+] - You are running an alpha version of onifw" +
                          color.END)
                else:
                    print(color.WARNING + "[!] - Unknown error" + color.END)

                shell("rm -rf {}/temp".format(installDir))
            except:
                ErrorHandler(err(), False, True)
Ejemplo n.º 15
0
def restoreDict(installDir):
    """Restore default dictionnary file
    """
    print("[*] - Restoring dictionnary to default...")
    try:
        f = open("{}data/dict.txt".format(installDir))
        out = []
        default = False
        for line in f:
            temp = str(line).rstrip('\n\r')
            if temp == "update":
                default = True
                out.append(temp)
            if default == False:
                out.append(line)
        f.close()

        f = open("{}data/dict.txt".format(installDir), 'w')
        out.append("\n")
        f.writelines(out)
        f.close()
    except:
        ErrorHandler(err(), False)
Ejemplo n.º 16
0
def write_circos_table(data,
                       name=None,
                       rowheader=None,
                       colheader=None,
                       prefix="label",
                       corner=None,
                       delimiter='\t'):
    '''
    wite a matrix of data in tab-delimated format file
    
    input:
    data: a 2 dimensioal array of data
    name: includes path and the name of file to save
    rowheader
    columnheader
    
    output:
    a file tabdelimated file 
    
    '''
    f = open(name, 'w')

    # write order header
    f.write("Data")
    f.write(delimiter)
    f.write("Data")
    f.write(delimiter)
    for i in range(len(data[0])):
        f.write(str(i + 1))
        if i < len(data[0]) - 1:
            f.write(delimiter)
    f.write('\n')
    # column numbers as header
    f.write("Data")
    f.write(delimiter)
    if len(colheader) == 0:
        f.write("Data")
        f.write(delimiter)
        for i in range(len(data[0])):
            f.write(str(i))
            if i < len(data[0]) - 1:
                f.write(delimiter)
        f.write('\n')
    elif len(colheader) == len(data[0]):
        f.write("Data")
        f.write(delimiter)
        for i in range(len(data[0][:])):
            f.write(colheader[i])
            if i < len(data[0]) - 1:
                f.write(delimiter)
        f.write('\n')
    else:
        sys.err("The lable list in not matched with the data size")
        sys.exit()

    for i in range(len(data)):
        if len(rowheader) == 0:
            f.write(str(i + len(data[0])))
            f.write(prefix + str(i))
            f.write(delimiter)
        elif len(colheader) == len(data[0]):
            f.write(str(i + len(data[0]) + 1))
            f.write(delimiter)
            f.write(rowheader[i])
            f.write(delimiter)
        else:
            sys.err("The lable list in not matched with the data size")
            sys.exit()

        for j in range(len(data[i])):
            f.write(str(data[i][j]))
            if j < len(data[i]) - 1:
                f.write(delimiter)
        f.write('\n')
    f.close()
Ejemplo n.º 17
0
    def process(self, refDB):
        """
        process reads against a reference fasta file
        """
        try:
            bc = self.orig_bc_lines[0].split(":")[0]
            mapped_pairs_count = 0
            remapped_pairs_count = 0
            mapped_singles_count = 0
            remapped_singles_count = 0
            secondary_alignment = 0
            count = 0
            PE1 = {}
            PE2 = {}

            for line in self.orig_bc_lines:
                # 0x1 template having multiple segments in sequencing
                # 0x2 each segment properly aligned according to the aligner
                # 0x4 segment unmapped
                # 0x8 next segment in the template unmapped
                # 0x10 SEQ being reverse complemented
                # 0x20 SEQ of the next segment in the template being reversed
                # 0x40 the first segment in the template
                # 0x80 the last segment in the template
                # 0x100 secondary alignment
                # 0x200 not passing quality controls
                # 0x400 PCR or optical duplicate
                lbc = line.split(":")[0]
                if lbc != bc:
                    sys.err(
                        "Something went wrong, more than one barcode in process barcodes"
                    )

                count += 1
                line2 = line.strip().split()
                flag = int(line2[1])
                # Secondary alignment
                if (
                        flag & 0x100
                ):  # not sure what to do with secondary alignment yet, for now ignore
                    secondary_alignment += 1
                    continue

                mq = int(line2[4])
                #                if mq < self.minMQ:
                #                    # add to those that need to be remapped
                #                    self.remap_lines.append(line)
                # Handle SE:
                # mapped SE reads have 0x1 set to 0, and 0x4 (third bit) set to 1
                if not (
                        flag & 0x1
                ):  # SE READ, shouldn't see singles, maybe handle them later
                    # if not (flag & 0x4 and mq >= self.minMQ):  # MAPPED
                    #     self.ok_bc_lines.append(line)
                    #     # TODO: NEED to determine read cloud for read
                    #     mapped_singles_count += 1
                    # else:  # UNMAPPED or Poor minMQ, remap the read
                    #     if (flag & 0x10):  # reverse complement
                    #         line2[9] = reverseComplement(line2[9])
                    #         line2[10] = reverse(line2[10])
                    #     self.addRead(['\n'.join(['@' + line2[0] + ' 1:N:O', line2[9], '+', line2[10]])])
                    #     remapped_singles_count += 1
                    continue
                # Handle PE:
                # logic:  0x1 = multiple segments in sequencing,   0x4 = segment unmapped,  0x8 = next segment unmapped
                if (flag & 0x1):  # PE READ
                    if (not (flag & 0x4)
                            and not (flag & 0x8)):  # both pairs mapped
                        if (flag & 0x40
                            ):  # is this PE1 (first segment in template)
                            # PE1 read, check that PE2 is in dict
                            ID = line2[0]
                            if ID in PE2:
                                if mq >= self.minMQ and int(
                                        PE2[ID].strip().split()[4]
                                ) >= self.minMQ:  # check MQ of both reads
                                    self.ok_bc_lines.append(line)
                                    self.ok_bc_lines.append(PE2[ID])
                                    del PE2[ID]
                                    # TODO: NEED to determine read cloud for read
                                    mapped_pairs_count += 1
                                else:
                                    if (flag & 0x10):  # reverse complement
                                        line2[9] = reverseComplement(line2[9])
                                        line2[10] = reverse(line2[10])
                                    r1 = '\n'.join([
                                        '@' + line2[0] + ' 1:N:O', line2[9],
                                        '+', line2[10]
                                    ])  # sequence + qual
                                    rl2 = PE2[ID].strip().split()
                                    if (int(rl2[1])
                                            & 0x10):  # reverse complement
                                        rl2[9] = reverseComplement(rl2[9])
                                        rl2[10] = reverse(rl2[10])
                                    r2 = '\n'.join([
                                        '@' + rl2[0] + ' 2:N:O', rl2[9], '+',
                                        rl2[10]
                                    ])  # sequence + qual
                                    self.addRead('\n'.join([r1, r2]))
                                    del PE2[ID]
                                    remapped_pairs_count += 1
                            else:
                                PE1[ID] = line
                        elif (flag &
                              0x80):  # is this PE2 (last segment in template)
                            # PE2 read, check that PE1 is in dict and write out
                            ID = line2[0]
                            if ID in PE1:
                                if mq >= self.minMQ and int(
                                        PE1[ID].strip().split()[4]
                                ) >= self.minMQ:  # check MQ of both reads
                                    self.ok_bc_lines.append(line)
                                    self.ok_bc_lines.append(PE1[ID])
                                    del PE1[ID]
                                    # TODO: NEED to determine read cloud for read
                                    mapped_pairs_count += 1
                                else:
                                    if (flag & 0x10):  # reverse complement
                                        line2[9] = reverseComplement(line2[9])
                                        line2[10] = reverse(line2[10])
                                    r2 = '\n'.join([
                                        '@' + line2[0] + ' 2:N:O', line2[9],
                                        '+', line2[10]
                                    ])  # sequence + qual
                                    rl1 = PE1[ID].strip().split()
                                    if (int(rl1[1])
                                            & 0x10):  # reverse complement
                                        rl1[9] = reverseComplement(rl1[9])
                                        rl1[10] = reverse(rl1[10])
                                    r1 = '\n'.join([
                                        '@' + rl1[0] + ' 1:N:O', rl1[9], '+',
                                        rl1[10]
                                    ])  # sequence + qual
                                    self.addRead('\n'.join([r1, r2]))
                                    del PE1[ID]
                                    remapped_pairs_count += 1
                            else:
                                PE2[ID] = line
                    else:  # an 'unmapped' pair, at least 1 unmapped
                        if (flag & 0x40
                            ):  # is this PE1 (first segment in template)
                            # PE1 read, check that PE2 is in dict and write out
                            ID = line2[0]
                            if ID in PE2:
                                if (flag & 0x10):  # reverse complement
                                    line2[9] = reverseComplement(line2[9])
                                    line2[10] = reverse(line2[10])
                                r1 = '\n'.join([
                                    '@' + line2[0] + ' 1:N:O', line2[9], '+',
                                    line2[10]
                                ])  # sequence + qual
                                rl2 = PE2[ID].strip().split()
                                if (int(rl2[1]) & 0x10):  # reverse complement
                                    rl2[9] = reverseComplement(rl2[9])
                                    rl2[10] = reverse(rl2[10])
                                r2 = '\n'.join([
                                    '@' + rl2[0] + ' 2:N:O', rl2[9], '+',
                                    rl2[10]
                                ])  # sequence + qual
                                self.addRead('\n'.join([r1, r2]))
                                del PE2[ID]
                                remapped_pairs_count += 1
                            else:
                                PE1[ID] = line
                        elif (flag &
                              0x80):  # is this PE2 (last segment in template)
                            # PE2 read, check that PE1 is in dict and write out
                            ID = line2[0]
                            if ID in PE1:
                                if (flag & 0x10):  # reverse complement
                                    line2[9] = reverseComplement(line2[9])
                                    line2[10] = reverse(line2[10])
                                r1 = '\n'.join([
                                    '@' + line2[0] + ' 1:N:O', line2[9], '+',
                                    line2[10]
                                ])  # sequence + qual
                                rl2 = PE2[ID].strip().split()
                                if (int(rl2[1]) & 0x10):  # reverse complement
                                    rl2[9] = reverseComplement(rl2[9])
                                    rl2[10] = reverse(rl2[10])
                                r2 = '\n'.join([
                                    '@' + rl2[0] + ' 2:N:O', rl2[9], '+',
                                    rl2[10]
                                ])  # sequence + qual
                                self.addRead('\n'.join([r1, r2]))
                                del PE2[ID]
                                remapped_pairs_count += 1
                            else:
                                PE2[ID] = line

        except (KeyboardInterrupt, SystemExit):
            sys.stderr.write("MAPPING\tERROR\t%s unexpectedly terminated\n" %
                             (__name__))
            return 1
        except:
            sys.stderr.write("".join(
                traceback.format_exception(*sys.exc_info())))
            return 1
Ejemplo n.º 18
0
def extract_folds(depth_file, min_fold_size=50, tiny_depth=0.001, save_file=False):
    """
    Use depth to extract folds from a triangular surface mesh.

    Steps ::
        1. Compute histogram of depth measures.
        2. Define a depth threshold and find the deepest vertices.
        3. Segment deep vertices as an initial set of folds.
        4. Remove small folds.
        5. Find and fill holes in the folds.
        6. Renumber folds.

    Step 2 ::
        To extract an initial set of deep vertices from the surface mesh,
        we anticipate that there will be a rapidly decreasing distribution
        of low depth values (on the outer surface) with a long tail
        of higher depth values (in the folds), so we smooth the histogram's
        bin values, convolve to compute slopes, and find the depth value
        for the first bin with slope = 0. This is our threshold.

    Step 5 ::
        The folds could have holes in areas shallower than the depth threshold.
        Calling fill_holes() could accidentally include very shallow areas
        (in an annulus-shaped fold, for example), so we call fill_holes() with
        the argument exclude_range set close to zero to retain these areas.

    Parameters
    ----------
    depth_file : string
        surface mesh file in VTK format with faces and depth scalar values
    min_fold_size : integer
        minimum fold size (number of vertices)
    tiny_depth : float
        largest non-zero depth value that will stop a hole from being filled
    save_file : Boolean
        save output VTK file?

    Returns
    -------
    folds : list of integers
        fold numbers for all vertices (-1 for non-fold vertices)
    n_folds :  int
        number of folds
    depth_threshold :  float
        threshold defining the minimum depth for vertices to be in a fold
    bins :  list of integers
        histogram bins: each is the number of vertices within a range of depth values
    bin_edges :  list of floats
        histogram bin edge values defining the bin ranges of depth values
    folds_file : string (if save_file)
        name of output VTK file with fold IDs (-1 for non-fold vertices)

    Examples
    --------
    >>> import os
    >>> import numpy as np
    >>> import pylab
    >>> from scipy.ndimage.filters import gaussian_filter1d
    >>> from mindboggle.utils.io_vtk import read_scalars
    >>> from mindboggle.utils.mesh import find_neighbors_from_file
    >>> from mindboggle.utils.plots import plot_vtk
    >>> from mindboggle.features.folds import extract_folds
    >>> path = os.environ['MINDBOGGLE_DATA']
    >>> depth_file = os.path.join(path, 'arno', 'shapes', 'lh.pial.travel_depth.vtk')
    >>> neighbor_lists = find_neighbors_from_file(depth_file)
    >>> min_fold_size = 50
    >>> tiny_depth = 0.001
    >>> save_file = True
    >>> #
    >>> folds, n_folds, thr, bins, bin_edges, folds_file = extract_folds(depth_file,
    >>>     min_fold_size, tiny_depth, save_file)
    >>> #
    >>> # View folds:
    >>> plot_vtk('folds.vtk')
    >>> # Plot histogram and depth threshold:
    >>> depths, name = read_scalars(depth_file)
    >>> nbins = np.round(len(depths) / 100.0)
    >>> a,b,c = pylab.hist(depths, bins=nbins)
    >>> pylab.plot(thr*np.ones((100,1)), np.linspace(0, max(bins), 100), 'r.')
    >>> pylab.show()
    >>> # Plot smoothed histogram:
    >>> bins_smooth = gaussian_filter1d(bins.tolist(), 5)
    >>> pylab.plot(range(len(bins)), bins, '.', range(len(bins)), bins_smooth,'-')
    >>> pylab.show()

    """
    import os
    import sys
    import numpy as np
    from time import time
    from scipy.ndimage.filters import gaussian_filter1d
    from mindboggle.utils.io_vtk import rewrite_scalars, read_vtk
    from mindboggle.utils.mesh import find_neighbors
    from mindboggle.utils.morph import fill_holes
    from mindboggle.utils.segment import segment

    do_fill_holes = True

    print("Extract folds in surface mesh")
    t0 = time()

    #-------------------------------------------------------------------------
    # Load depth values for all vertices
    #-------------------------------------------------------------------------
    faces, lines, indices, points, npoints, depths, name, input_vtk = read_vtk(depth_file,
        return_first=True, return_array=True)

    #-------------------------------------------------------------------------
    # Find neighbors for each vertex
    #-------------------------------------------------------------------------
    neighbor_lists = find_neighbors(faces, npoints)

    #-------------------------------------------------------------------------
    # Compute histogram of depth measures
    #-------------------------------------------------------------------------
    min_vertices = 10000
    if npoints > min_vertices:
        nbins = np.round(npoints / 100.0)
    else:
        sys.err("  Expecting at least {0} vertices to create depth histogram".
            format(min_vertices))
    bins, bin_edges = np.histogram(depths, bins=nbins)

    #-------------------------------------------------------------------------
    # Anticipating that there will be a rapidly decreasing distribution
    # of low depth values (on the outer surface) with a long tail of higher
    # depth values (in the folds), smooth the bin values (Gaussian), convolve
    # to compute slopes, and find the depth for the first bin with slope = 0.
    #-------------------------------------------------------------------------
    bins_smooth = gaussian_filter1d(bins.tolist(), 5)
    window = [-1, 0, 1]
    bin_slopes = np.convolve(bins_smooth, window, mode='same') / (len(window) - 1)
    ibins0 = np.where(bin_slopes == 0)[0]
    if ibins0.shape:
        depth_threshold = bin_edges[ibins0[0]]
    else:
        depth_threshold = np.median(depths)

    #-------------------------------------------------------------------------
    # Find the deepest vertices
    #-------------------------------------------------------------------------
    indices_deep = [i for i,x in enumerate(depths) if x >= depth_threshold]
    if indices_deep:

        #---------------------------------------------------------------------
        # Segment deep vertices as an initial set of folds
        #---------------------------------------------------------------------
        print("  Segment vertices deeper than {0:.2f} as folds".format(depth_threshold))
        t1 = time()
        folds = segment(indices_deep, neighbor_lists)
        # Slightly slower alternative -- fill boundaries:
        #regions = -1 * np.ones(len(points))
        #regions[indices_deep] = 1
        #folds = segment_by_filling_borders(regions, neighbor_lists)
        print('  ...Segmented folds ({0:.2f} seconds)'.format(time() - t1))

        #---------------------------------------------------------------------
        # Remove small folds
        #---------------------------------------------------------------------
        if min_fold_size > 1:
            print('  Remove folds smaller than {0}'.format(min_fold_size))
            unique_folds = [x for x in np.unique(folds) if x > -1]
            for nfold in unique_folds:
                indices_fold = [i for i,x in enumerate(folds) if x == nfold]
                if len(indices_fold) < min_fold_size:
                    folds[indices_fold] = -1

        #---------------------------------------------------------------------
        # Find and fill holes in the folds
        # Note: Surfaces surrounded by folds can be mistaken for holes,
        #       so exclude_range includes outer surface values close to zero.
        #---------------------------------------------------------------------
        if do_fill_holes:
            print("  Find and fill holes in the folds")
            folds = fill_holes(folds, neighbor_lists, values=depths,
                               exclude_range=[0, tiny_depth])

        #---------------------------------------------------------------------
        # Renumber folds so they are sequential
        #---------------------------------------------------------------------
        renumber_folds = -1 * np.ones(len(folds))
        fold_numbers = [int(x) for x in np.unique(folds) if x > -1]
        for i_fold, n_fold in enumerate(fold_numbers):
            fold = [i for i,x in enumerate(folds) if x == n_fold]
            renumber_folds[fold] = i_fold
        folds = renumber_folds
        n_folds = i_fold + 1

        # Print statement
        print('  ...Extracted {0} folds ({1:.2f} seconds)'.
              format(n_folds, time() - t0))
    else:
        print('  No deep vertices')

    #-------------------------------------------------------------------------
    # Return folds, number of folds, file name
    #-------------------------------------------------------------------------
    if save_file:

        folds_file = os.path.join(os.getcwd(), 'folds.vtk')
        rewrite_scalars(depth_file, folds_file, folds, 'folds', folds)

        if not os.path.exists(folds_file):
            raise(IOError(folds_file + " not found"))

    else:
        folds_file = None

    return folds.tolist(), n_folds, depth_threshold, bins, bin_edges, folds_file
Ejemplo n.º 19
0
def extract_folds(depth_file,
                  min_vertices=10000,
                  min_fold_size=50,
                  do_fill_holes=False,
                  min_hole_depth=0.001,
                  save_file=False):
    """
    Use depth to extract folds from a triangular surface mesh.

    Steps ::
        1. Compute histogram of depth measures.
        2. Define a depth threshold and find the deepest vertices.
        3. Segment deep vertices as an initial set of folds.
        4. Remove small folds.
        5. Find and fill holes in the folds (optional).
        6. Renumber folds.

    Step 2 ::
        To extract an initial set of deep vertices from the surface mesh,
        we anticipate that there will be a rapidly decreasing distribution
        of low depth values (on the outer surface) with a long tail
        of higher depth values (in the folds), so we smooth the histogram's
        bin values, convolve to compute slopes, and find the depth value
        for the first bin with slope = 0. This is our threshold.

    Step 5 ::
        The folds could have holes in areas shallower than the depth threshold.
        Calling fill_holes() could accidentally include very shallow areas
        (in an annulus-shaped fold, for example), so we include the argument
        exclude_range to check for any values from zero to min_hole_depth;
        holes are not filled if they contains values within this range.

    Parameters
    ----------
    depth_file : string
        surface mesh file in VTK format with faces and depth scalar values
    min_fold_size : integer
        minimum fold size (number of vertices)
    do_fill_holes : Boolean
        fill holes in the folds?
    min_hole_depth : float
        largest non-zero depth value that will stop a hole from being filled
    save_file : Boolean
        save output VTK file?

    Returns
    -------
    folds : list of integers
        fold numbers for all vertices (-1 for non-fold vertices)
    n_folds :  int
        number of folds
    depth_threshold :  float
        threshold defining the minimum depth for vertices to be in a fold
    bins :  list of integers
        histogram bins: each is the number of vertices within a range of depth values
    bin_edges :  list of floats
        histogram bin edge values defining the bin ranges of depth values
    folds_file : string (if save_file)
        name of output VTK file with fold IDs (-1 for non-fold vertices)

    Examples
    --------
    >>> import os
    >>> import numpy as np
    >>> import pylab
    >>> from scipy.ndimage.filters import gaussian_filter1d
    >>> from mindboggle.mio.vtks import read_scalars
    >>> from mindboggle.guts.mesh import find_neighbors_from_file
    >>> from mindboggle.mio.plots import plot_surfaces
    >>> from mindboggle.features.folds import extract_folds
    >>> path = os.environ['MINDBOGGLE_DATA']
    >>> depth_file = 'travel_depth.vtk' #os.path.join(path, 'arno', 'shapes', 'lh.pial.travel_depth.vtk')
    >>> neighbor_lists = find_neighbors_from_file(depth_file)
    >>> min_vertices = 10000
    >>> min_fold_size = 50
    >>> do_fill_holes = False #True
    >>> min_hole_depth = 0.001
    >>> save_file = True
    >>> #
    >>> folds, n_folds, thr, bins, bin_edges, folds_file = extract_folds(depth_file,
    >>>     min_vertices, min_fold_size, do_fill_holes, min_hole_depth, save_file)
    >>> #
    >>> # View folds:
    >>> plot_surfaces('folds.vtk')
    >>> # Plot histogram and depth threshold:
    >>> depths, name = read_scalars(depth_file)
    >>> nbins = np.round(len(depths) / 100.0)
    >>> a,b,c = pylab.hist(depths, bins=nbins)
    >>> pylab.plot(thr*np.ones((100,1)), np.linspace(0, max(bins), 100), 'r.')
    >>> pylab.show()
    >>> # Plot smoothed histogram:
    >>> bins_smooth = gaussian_filter1d(bins.tolist(), 5)
    >>> pylab.plot(range(len(bins)), bins, '.', range(len(bins)), bins_smooth,'-')
    >>> pylab.show()

    """
    import os
    import sys
    import numpy as np
    from time import time
    from scipy.ndimage.filters import gaussian_filter1d
    from mindboggle.mio.vtks import rewrite_scalars, read_vtk
    from mindboggle.guts.mesh import find_neighbors
    from mindboggle.guts.morph import fill_holes
    from mindboggle.guts.segment import segment

    print("Extract folds in surface mesh")
    t0 = time()

    #-------------------------------------------------------------------------
    # Load depth values for all vertices
    #-------------------------------------------------------------------------
    points, indices, lines, faces, depths, scalar_names, npoints, \
        input_vtk = read_vtk(depth_file, return_first=True, return_array=True)

    #-------------------------------------------------------------------------
    # Find neighbors for each vertex
    #-------------------------------------------------------------------------
    neighbor_lists = find_neighbors(faces, npoints)

    #-------------------------------------------------------------------------
    # Compute histogram of depth measures
    #-------------------------------------------------------------------------
    if npoints > min_vertices:
        nbins = np.round(npoints / 100.0)
    else:
        sys.err("  Expecting at least {0} vertices to create depth histogram".
                format(min_vertices))
    bins, bin_edges = np.histogram(depths, bins=nbins)

    #-------------------------------------------------------------------------
    # Anticipating that there will be a rapidly decreasing distribution
    # of low depth values (on the outer surface) with a long tail of higher
    # depth values (in the folds), smooth the bin values (Gaussian), convolve
    # to compute slopes, and find the depth for the first bin with slope = 0.
    #-------------------------------------------------------------------------
    bins_smooth = gaussian_filter1d(bins.tolist(), 5)
    window = [-1, 0, 1]
    bin_slopes = np.convolve(bins_smooth, window,
                             mode='same') / (len(window) - 1)
    ibins0 = np.where(bin_slopes == 0)[0]
    if ibins0.shape:
        depth_threshold = bin_edges[ibins0[0]]
    else:
        depth_threshold = np.median(depths)

    #-------------------------------------------------------------------------
    # Find the deepest vertices
    #-------------------------------------------------------------------------
    indices_deep = [i for i, x in enumerate(depths) if x >= depth_threshold]
    if indices_deep:

        #---------------------------------------------------------------------
        # Segment deep vertices as an initial set of folds
        #---------------------------------------------------------------------
        print("  Segment vertices deeper than {0:.2f} as folds".format(
            depth_threshold))
        t1 = time()
        folds = segment(indices_deep, neighbor_lists)
        # Slightly slower alternative -- fill boundaries:
        #regions = -1 * np.ones(len(points))
        #regions[indices_deep] = 1
        #folds = segment_by_filling_borders(regions, neighbor_lists)
        print('  ...Segmented folds ({0:.2f} seconds)'.format(time() - t1))

        #---------------------------------------------------------------------
        # Remove small folds
        #---------------------------------------------------------------------
        if min_fold_size > 1:
            print('  Remove folds smaller than {0}'.format(min_fold_size))
            unique_folds = [x for x in np.unique(folds) if x != -1]
            for nfold in unique_folds:
                indices_fold = [i for i, x in enumerate(folds) if x == nfold]
                if len(indices_fold) < min_fold_size:
                    folds[indices_fold] = -1

        #---------------------------------------------------------------------
        # Find and fill holes in the folds
        # Note: Surfaces surrounded by folds can be mistaken for holes,
        #       so exclude_range includes outer surface values close to zero.
        #---------------------------------------------------------------------
        if do_fill_holes:
            print("  Find and fill holes in the folds")
            folds = fill_holes(folds,
                               neighbor_lists,
                               values=depths,
                               exclude_range=[0, min_hole_depth])

        #---------------------------------------------------------------------
        # Renumber folds so they are sequential
        #---------------------------------------------------------------------
        renumber_folds = -1 * np.ones(len(folds))
        fold_numbers = [int(x) for x in np.unique(folds) if x != -1]
        for i_fold, n_fold in enumerate(fold_numbers):
            fold = [i for i, x in enumerate(folds) if x == n_fold]
            renumber_folds[fold] = i_fold
        folds = renumber_folds
        n_folds = i_fold + 1

        # Print statement
        print('  ...Extracted {0} folds ({1:.2f} seconds)'.format(
            n_folds,
            time() - t0))
    else:
        print('  No deep vertices')

    folds = [int(x) for x in folds]

    #-------------------------------------------------------------------------
    # Return folds, number of folds, file name
    #-------------------------------------------------------------------------
    if save_file:

        folds_file = os.path.join(os.getcwd(), 'folds.vtk')
        rewrite_scalars(depth_file, folds_file, folds, 'folds', folds)

        if not os.path.exists(folds_file):
            raise (IOError(folds_file + " not found"))

    else:
        folds_file = None

    return folds, n_folds, depth_threshold, bins, bin_edges, folds_file
Ejemplo n.º 20
0
def create_trials(dict, data_h5, options):
    trial_id = data_h5["trialIds/trialIds"].value
    if options.verbose:
        print("\nCreating trials with ids: " + str(trial_id))
    trial_t = data_h5["trialStartTimes/trialStartTimes"].value
    # trial stop isn't stored. assume that it's twice the duration of other
    #   trials -- padding on the high side shouldn't matter
    ival = (trial_t[-1] - trial_t[0]) / (len(trial_t) - 1)
    trial_t = np.append(trial_t, trial_t[-1] + 2 * ival)

    if options.data_type == "ephys":
        good_trials = get_value_by_key(data_h5["/trialPropertiesHash"],
                                       "GoodTrials")
        ignore_ivals_start = [
            time for (time, good_trial) in zip(trial_t, good_trials)
            if good_trial == 0
        ]
        # trial stop isn't stored. assume that it's twice the duration of other
        #   trials -- padding on the high side shouldn't matter
        ignore_ivals_stop = [
            time for (time, good_trial) in zip(trial_t[1:], good_trials)
            if good_trial == 0
        ]
        ignore_intervals = [ignore_ivals_start, ignore_ivals_stop]

        keyName3 = "PhotostimulationType"
        hash_group_pointer2 = data_h5["/trialPropertiesHash"]
        stimulus_types = np.array(
            get_value_by_key(hash_group_pointer2, keyName3)).tolist()
        count_1 = stimulus_types.count(1)
        count_2 = stimulus_types.count(2)
    elif options.data_type == "ophys":
        plane_map = create_plane_map(data_h5, options)
        epoch_roi_list, epoch_roi_planes = create_trial_roi_map(
            data_h5, plane_map, options)
    for i in range(len(trial_id)):
        tid = trial_id[i]
        trial = "trial_%d%d%d" % (int(tid / 100), int(tid / 10) % 10, tid % 10)
        dict["epochs." + trial +
             ".description"] = "Data that belong to " + trial
        if options.data_type == "ephys":
            start = trial_t[i]
            stop = trial_t[i + 1]
            dict["epochs." + trial + ".start_time"] = start
            dict["epochs." + trial + ".stop_time"] = stop
            tags = []
            if good_trials[i] == 1:
                tags.append("Good trial")
            else:
                tags.append("Non-performing")
            for j in range(len(epoch_tags[trial])):
                tags.append(epoch_tags[trial][j])
            try:
                dict["epochs." + trial + ".tags"] = tags
            except:
                sys.err("   Unable to create dataset 'tag' containing " +
                        str(tags))
            # keep with tradition and create a units field, even if it's empty
            if trial not in epoch_units:
                units = ["NA"]
            else:
                units = epoch_units[trial]
            try:
                dict["epochs." + trial + ".units_present"] = units
            except:
                print "   Unable to create dataset 'units_present' containing ", units

            raw_path = "descrHash/value/%d" % (trial_id[i])
            raw_file = parse_h5_obj(data_h5[raw_path])[0]
            if len(raw_file) == 1:
                raw_file = 'na'
            else:
                raw_file = str(raw_file)

        elif options.data_type == "ophys":
            start = trial_t[i]
            stop = trial_t[i + 1]

            dict["epochs." + trial + ".start_time"] = start
            dict["epochs." + trial + ".stop_time"] = stop

            if trial in epoch_roi_list.keys():
                dict["epochs." + trial + ".ROIs"] = epoch_roi_list[trial]
                dict["epochs." + trial +
                     ".ROI_planes"] = epoch_roi_planes[trial]
            tags = []
            if trial in epoch_trial_types:
                for j in range(len(epoch_trial_types[trial])):
                    tags.append(epoch_trial_types[trial][j])
            dict["epochs." + trial + ".tags"] = tags

    return dict
Ejemplo n.º 21
0
def main(args):

    parser = argparse.ArgumentParser(
        description='Check if the barcodes  are present in the sample')
    parser.add_argument(
        '-i',
        '--infile')  # file containing the sample name and the barcode string
    parser.add_argument('-o', '--outfile',
                        default="out.txt")  # output filename
    parser.add_argument('-l', '--logfile',
                        default="ITS-log-file.txt")  # output filename
    parser.add_argument('-a', '--amplicon', default="")  # 16S, nifH, ITS
    parser.add_argument('-f', '--fasta', action='store_true',
                        default=False)  # files in fasta format, default fastq
    parser.add_argument('-q', '--quality', default=30,
                        type=int)  # Phred quality threshold of seq.
    parser.add_argument(
        '-z', '--trimfastq',
        default='./Trimmed_fastq_files/')  # prefix for trimmed files
    parser.add_argument('-c', '--cutadapt', action='store_true',
                        default=False)  # run cutadapt
    parser.add_argument('-d', '--delimiter',
                        default='\t')  # delimiter for file
    parser.add_argument(
        '-m', '--histogram', action='store_true',
        default=False)  # draw histogram for each adapter using infile

    args = parser.parse_args()

    if len(sys.argv) == 1:
        parser.print_help()
        sys.exit('\natleast one argument required\n')

    infile = args.infile
    outpfile = args.outfile
    filetype = '.fasta' if args.fasta else '.fastq'
    logfile = args.logfile
    run_cutadapt = args.cutadapt
    delim = args.delimiter
    if args.histogram:
        draw_histogram_adpaters_trimed_seqs(infile)
        sys.exit(0)

    global FWDPRIMER, FADAPTER, REVPRIMER, RADAPTER, QUALITY, R_1, R_2, INTERMFASTQ, INTERMINFO, TRIMFASTQF, TRIMFASTQD, \
                REV_COMPLEM_RAD_RP, REV_COMPLEM_FAD_FP, FAD_FP, RAD_RP, REV_COMPLEM_FWDPRIMER, REV_COMPLEM_REVPRIMER
    if args.amplicon == "16S":  # primers and adapters for the 16S data
        FWDPRIMER = 'CCTACGGGNGGCWGCAG'
        FADAPTER = 'TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG'
        REVPRIMER = 'GACTACHVGGGTATCTAATCC'
        RADAPTER = 'GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG'
    elif args.amplicon == "nifH":  # primers and adapters for the nifH data
        FWDPRIMER = 'TGCGAYCCSAARGCBGACTC'
        FADAPTER = 'TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG'
        REVPRIMER = 'ATSGCCATCATYTCRCCGGA'
        RADAPTER = 'GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG'
    elif args.amplicon == "ITS":  # primers and adapters for the ITS data
        FWDPRIMER = 'CTTGGTCATTTAGAGGAAGTAA'
        FADAPTER = 'TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG'
        REVPRIMER = 'GCTGCGTTCTTCATCGATGC'
        RADAPTER = 'GTCTCGTGGGCTCGGAGATGTGTATAAGAGACAG'
    else:
        sys.err(
            "The code was modified so that you now have to explicitly mention the amplicon (16S, nifH, ITS). Previous code allowed 16S with a boolean (True) -a and without the argument it defaulted to ITS"
        )

    REV_COMPLEM_FWDPRIMER = BioSeq.reverse_complement(
        FWDPRIMER)  # search at end in read 2
    REV_COMPLEM_REVPRIMER = BioSeq.reverse_complement(
        REVPRIMER)  # search at end in read 1

    FAD_FP = FADAPTER + FWDPRIMER  # check at start in read 1
    RAD_RP = RADAPTER + REVPRIMER  # check at start in read 2
    REV_COMPLEM_RAD_RP = BioSeq.reverse_complement(
        RAD_RP)  # search at end in read 1
    REV_COMPLEM_FAD_FP = BioSeq.reverse_complement(
        FAD_FP)  # search at end in read 2

    QUALITY = args.quality
    R_1 = '_L001_R1_001'
    R_2 = '_L001_R2_001'
    #INTERMFASTQ = args.intermedfastq
    #INTERMINFO = args.intermedinfo
    TRIMFASTQD = args.trimfastq  # dir
    TRIMFASTQF = TRIMFASTQD + 'trim_'  # dir + prefix for files

    for dir in [TRIMFASTQD]:
        if not os.path.exists(dir):
            os.makedirs(dir)

    lines = read_file(infile)
    #GRL4463_S49_L001_R1_001 or 1_S72_L001_R1_001
    SAMPLE_BARCODE_DICT = dict()  # this is not a global variable
    sampleid_col = fbarcode_col = rbarcode_col = 0
    for l in lines:
        cont = l.strip().split(delim)
        if '#' in l:
            # identify column number of SampleId and barcodeseq. check if present to avoid ValueError if the target is not found
            if "#SampleID" in cont:
                sampleid_col = cont.index("#SampleID")
            if "FBarcodeSequence" in cont:
                fbarcode_col = cont.index("FBarcodeSequence")
            if "RBarcodeSequence" in cont:
                rbarcode_col = cont.index("RBarcodeSequence")
            continue
        if sampleid_col == 0 and fbarcode_col == 0 and rbarcode_col == 0:
            sys.exit(
                'Could not detect the column labels SampleID, FBarcodeSequence, or RBarcodeSequence'
            )
        sample = cont[sampleid_col] + '_S'
        barcode = cont[fbarcode_col] + '_' + cont[rbarcode_col]
        if sample in SAMPLE_BARCODE_DICT:
            sys.exit(
                '\nduplicate samples/barcodes detected, check sample sheet\n')
        else:
            SAMPLE_BARCODE_DICT[sample] = barcode
        # The nextera 16S metagenomic kit has the following layout:
        #P5 - Index2 - overhang_adapter - fwd_primer - DNA - rev_primer - overhang_adapter - Index1 - P7
    barcode_stats(outpfile, SAMPLE_BARCODE_DICT, delim, filetype, run_cutadapt,
                  logfile)
Ejemplo n.º 22
0
#!/usr/bin/python3

import sys

peaks = dict()

table = open(sys.argv[1])
table.readline()
for i in table:
	f = i.rstrip().split("\t")
	chrx  = f[0] + ":" + f[1]
	start = int(f[2])
	end   = int(f[3])
	if start > end:
		sys.err("Bro, something wrong with the coordinates!" + i)
		exit(0)
	if chrx not in peaks:
		peaks[chrx] = list()
	peaks[chrx].append((start, end))

for chrx in peaks:
	for i in range(len(peaks[chrx]) - 1):
		for j in range(i+1, len(peaks[chrx])):
			distance = abs(peaks[chrx][j][0] - peaks[chrx][i][1])
			print(distance)
Ejemplo n.º 23
0
	nvalues = len(mvs[0])
	print nvalues
	for nv in range(0,nvalues):
        if mvs[0][nv]+count == 0 or mvs[0][nv]+count == ntimes-1:
            #can't do anything for missing values at the very beginning or very end of the timeseries
            #print "can't deal with missing values sequentially in time, setting to zero"
            precipin[mvs[0][nv],mvs[1][nv],mvs[2][nv]] = 0.0

		elif mvs[0][nv] < step+1:
            if (precipin[mvs[0][nv]-1,mvs[1][nv],mvs[2][nv]] > -1 and precipin[mvs[0][nv]+1,mvs[1][nv],mvs[2][nv]] > -1):
                    precipin[mvs[0][nv],mvs[1][nv],mvs[2][nv]] = 0.5 * precipin[mvs[0][nv]-1,mvs[1][nv],mvs[2][nv]] + \
                        0.5 * precipin[mvs[0][nv]+1,mvs[1][nv],mvs[2][nv]]
			else:
                precipin[mvs[0][nv],mvs[1][nv],mvs[2][nv]] = 0.0
		else:
			#don't do anything for the last timestamp in each segment - it will be included in the next unless it's the
			#last one in which case we've already dealt with it!
			print "not doing anything for time " + str(count + mvs[0][nv])

	count = count + step

	mvs2 = np.where(precipin[0:step+1,:,:] < -1)
	nvalues2 = len(mvs2[0])
	if nvalues2 != 0:
        sys.err("error with filling missing data; missing data remains")

	precipdata.variables['pcp'][nt:upto,:,:] = precipin
	print 'finished filling in missing data'


Ejemplo n.º 24
0
import sys
import json
import yaml

try:
    jsonfile = sys.argv[1]
except IndexError:
    sys.err(f"Usage: {sys.argv[0]} JSONFILE")

with open(jsonfile) as f:
    data = json.load(f)

tickets = data["tickets"]
output = {"labels": {}}
tag_types = [
    ["component", "c:"],
    ["priority", "p:"],
    ["type", "t:"],
    ["owner", "o:"],
    ["version", "v:"],
    ["resolution", "r:"],
]
# Convert space to hyphen and drop punctuation
fixer = str.maketrans(" ", "-", ",;:")
for tag_type, prefix in tag_types:
    tag_list = set(d.get("attributes").get(tag_type) for d in tickets.values())
    output["labels"][tag_type] = {
        _: f"{prefix}{_.translate(fixer)}"
        for _ in tag_list
    }
    print(f"{tag_type}s:", tag_list)