Esempio n. 1
0
 def command(cls, args_list):
     Log.info('Running system command: {0}'.format(' '.join(args_list)))
     proc = subprocess.Popen(args_list,
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE)
     s_output, s_err = proc.communicate()
     s_return = proc.returncode
     return s_return, s_output, s_err
Esempio n. 2
0
 def getFiles(cls):
     filesTemp = []
     files = []
     try:
         filesTemp = listdir('../data/output')
     except EnvironmentError as e:
         Log.error('Input file error:')
         Log.error(e)
         Log.exit()
     for file in filesTemp:
         if re.search('.*afas$', file):
             files.append('../data/output/' + file)
     files.sort()
     return files
Esempio n. 3
0
def main():
    Log.info('------------------')
    Log.info('Split Task Starts')
    Log.info('------------------')
    inputArgs = sys.argv
    args = inputArgs[1:]
    filePath, fileName = Input.getValues(args)
    Log.info('Path:')
    Log.info(filePath)
    Log.info('File name:')
    Log.info(fileName)
    try:
        with open(filePath) as f:
            lines = sum(1 for _ in f)
    except EnvironmentError as e:
        Log.error('Input file error:')
        Log.error(e)
        Log.exit()
    Log.info('Number of sequences:')
    Log.info(lines / 2)
    Log.info('The dataset is being split in files of 50 sequences.')
    Log.info('Total files:')
    filesNumber = math.ceil(lines / 100)
    Log.info(filesNumber)
    filesList = []
    count = 0
    Log.info('File names:')
    while count < filesNumber:
        count += 1
        filesList.append('in-' + str(count) + '-' + fileName)
    for file in filesList:
        Log.info(file)
    try:
        lineCount = 0
        filesCount = 0
        input = open(filePath, "r")
        for line in input:
            if lineCount % 100 == 0:
                try:
                    output.close()
                except:
                    Log.info('Start splitting the dataset..')
                output = open('../data/input/' + filesList[filesCount], "a")
                filesCount = filesCount + 1
            output.write(line)
            lineCount = lineCount + 1
        input.close()
        Log.info('Finish splitting the dataset..')
    except EnvironmentError as e:
        Log.error('Unkown error:')
        Log.error(e)
        Log.exit()
    Log.info('---------------------')
    Log.info('Split Task Completes')
    Log.info('---------------------')
Esempio n. 4
0
def main():
    Log.info('------------------')
    Log.info('Split task Starts')
    Log.info('------------------')
    inputArgs = sys.argv
    args = inputArgs[1:]
    filePath, fileName = Input.getValues(args)
    Log.info('Path:')
    Log.info(filePath)
    Log.info('File name:')
    Log.info(fileName)
    try:
        with open(filePath) as f:
            lines = sum(1 for _ in f)
    except  EnvironmentError as e:
        Log.error('Input file error:')
        Log.error(e)
        Log.exit()
    Log.info('Number of sequences:')
    Log.info(lines/2)
    Log.info('The dataset is being splittted in files of 50 sequences.')
    Log.info('Total files:')
    filesNumber = math.ceil(lines/100)
    Log.info(filesNumber)
    filesList = []
    count = 0
    Log.info('File names:')
    Log.info('---------------------')
    Log.info('Split task Completes')
    Log.info('---------------------')
Esempio n. 5
0
    def getValues(cls, inputArgs):
        Log.info("input arguments: {}".format(inputArgs))
        options = "f:"
        longOptions = ["file="]
        try:
            opts, args = getopt.getopt(inputArgs, options, longOptions)
        except getopt.GetoptError as err:
            Log.error(err)
            Log.exit()

        fileFlag = False
        filePath = None
        fileName = None

        for opt, arg in opts:
            Log.info("processing option: {} with arguments: {}".format(
                opt, arg))
            if opt in ("-f", "--file"):
                if fileFlag:
                    Log.error(
                        "Input Error. Can't pass one argument twice. Exiting the application.."
                    )
                    Log.exit()
                else:
                    fileFlag = True
                    filePath = arg
                    fileName = cls.fileInput(arg)

        if fileFlag is False:
            Log.error(
                "Input Error. You must specify a valid file. Exiting the application.."
            )
            Log.exit()

        return filePath, fileName
def main():
    Log.info('---------------------')
    Log.info('Profiler Task Starts')
    Log.info('---------------------')
    files = Filesystem.getFiles()
    Log.info("List of afas files:")
    for file in files:
        Log.info(file)
    Log.info('Merging the files with MUSCLE profile option')
    filesNumber = len(files)
    count = 0
    while count < filesNumber:
        if count == 0:
            Log.info('Profiling the first two files..')
            print(files[count], files[count + 1], '../data/profile/temp-' + str(count+1) + '.afas')
            (ret, out, err) = muscle(files[count], files[count + 1], '../data/profile/temp-' + str(count+1) + '.afas')
            Log.info("return, {}".format(ret))
            Log.info("output, {}".format(out))
            Log.error("error, {}".format(err))
        elif filesNumber == count + 1:
            Log.info('Profiling the last file..')
            print(files[count], '../data/profile/temp-' + str(count - 1) + '.afas','../data/final/result.afas')
            (ret, out, err) = muscle(files[count], '../data/profile/temp-' + str(count - 1) + '.afas','../data/final/result.afas')
            Log.info("return, {}".format(ret))
            Log.info("output, {}".format(out))
            Log.error("error, {}".format(err))
        elif filesNumber >= count + 3:
            Log.info('Profiling a single file..')
            print(files[count+1], '../data/profile/temp-' + str(count) + '.afas', '../data/profile/temp-' + str(count+1) + '.afas')
            (ret, out, err) = muscle(files[count+1], '../data/profile/temp-' + str(count) + '.afas', '../data/profile/temp-' + str(count+1) + '.afas')
            Log.info("return, {}".format(ret))
            Log.info("output, {}".format(out))
            Log.error("error, {}".format(err))
        count = count + 1
    Log.info('Please see the results in folder data/final')
    Log.info('------------------------')
    Log.info('Profiler Task Completes')
    Log.info('------------------------')
Esempio n. 7
0
def main():

    log = Log(''.join(choice(ascii_uppercase) for i in range(12)))
    Log.info('------------------------------')
    Log.info('MUSCLE Process Starts')
    Log.info('------------------------------')
    inputArgs = sys.argv
    args = inputArgs[1:]
    filePath, fileName = Input.getValues(args)
    Log.info('Path:')
    Log.info(filePath)
    Log.info('Filename:')
    Log.info(fileName)
    try:
        with open(filePath) as f:
            lines = sum(1 for _ in f)
    except EnvironmentError as e:
        Log.error('Input file error:')
        Log.error(e)
        Log.exit()
    Log.info('Number of sequences:')
    Log.info(lines / 2)
    outputFilePath = filePath.replace('in', 'out')
    outputFilePath = outputFilePath.replace('fas', 'afas')
    Log.info('Starting MUSCLE..')
    print(filePath, outputFilePath)
    (ret, out, err) = muscle(filePath, outputFilePath)
    Log.info("return, {}".format(ret))
    Log.info("output, {}".format(out))
    Log.error("error, {}".format(err))
    Log.info('------------------------')
    Log.info('MUSCLE Process Completes')
    Log.info('------------------------')