def getValues(cls, inputArgs): Log.info("input arguments: {}".format(inputArgs)) options = "f:" longOptions = ["file="] try: opts, args = getopt.getopt(inputArgs, options, longOptions) except getopt.GetoptError as err: Log.error(err) Log.exit() fileFlag = False filePath = None fileName = None for opt, arg in opts: Log.info("processing option: {} with arguments: {}".format( opt, arg)) if opt in ("-f", "--file"): if fileFlag: Log.error( "Input Error. Can't pass one argument twice. Exiting the application.." ) Log.exit() else: fileFlag = True filePath = arg fileName = cls.fileInput(arg) if fileFlag is False: Log.error( "Input Error. You must specify a valid file. Exiting the application.." ) Log.exit() return filePath, fileName
def command(cls, args_list): Log.info('Running system command: {0}'.format(' '.join(args_list))) proc = subprocess.Popen(args_list, stdout=subprocess.PIPE, stderr=subprocess.PIPE) s_output, s_err = proc.communicate() s_return = proc.returncode return s_return, s_output, s_err
def main(): Log.info('------------------') Log.info('Split Task Starts') Log.info('------------------') inputArgs = sys.argv args = inputArgs[1:] filePath, fileName = Input.getValues(args) Log.info('Path:') Log.info(filePath) Log.info('File name:') Log.info(fileName) try: with open(filePath) as f: lines = sum(1 for _ in f) except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() Log.info('Number of sequences:') Log.info(lines / 2) Log.info('The dataset is being split in files of 50 sequences.') Log.info('Total files:') filesNumber = math.ceil(lines / 100) Log.info(filesNumber) filesList = [] count = 0 Log.info('File names:') while count < filesNumber: count += 1 filesList.append('in-' + str(count) + '-' + fileName) for file in filesList: Log.info(file) try: lineCount = 0 filesCount = 0 input = open(filePath, "r") for line in input: if lineCount % 100 == 0: try: output.close() except: Log.info('Start splitting the dataset..') output = open('../data/input/' + filesList[filesCount], "a") filesCount = filesCount + 1 output.write(line) lineCount = lineCount + 1 input.close() Log.info('Finish splitting the dataset..') except EnvironmentError as e: Log.error('Unkown error:') Log.error(e) Log.exit() Log.info('---------------------') Log.info('Split Task Completes') Log.info('---------------------')
def main(): Log.info('------------------') Log.info('Split task Starts') Log.info('------------------') inputArgs = sys.argv args = inputArgs[1:] filePath, fileName = Input.getValues(args) Log.info('Path:') Log.info(filePath) Log.info('File name:') Log.info(fileName) try: with open(filePath) as f: lines = sum(1 for _ in f) except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() Log.info('Number of sequences:') Log.info(lines/2) Log.info('The dataset is being splittted in files of 50 sequences.') Log.info('Total files:') filesNumber = math.ceil(lines/100) Log.info(filesNumber) filesList = [] count = 0 Log.info('File names:') Log.info('---------------------') Log.info('Split task Completes') Log.info('---------------------')
def main(): Log.info('---------------------') Log.info('Profiler Task Starts') Log.info('---------------------') files = Filesystem.getFiles() Log.info("List of afas files:") for file in files: Log.info(file) Log.info('Merging the files with MUSCLE profile option') filesNumber = len(files) count = 0 while count < filesNumber: if count == 0: Log.info('Profiling the first two files..') print(files[count], files[count + 1], '../data/profile/temp-' + str(count+1) + '.afas') (ret, out, err) = muscle(files[count], files[count + 1], '../data/profile/temp-' + str(count+1) + '.afas') Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err)) elif filesNumber == count + 1: Log.info('Profiling the last file..') print(files[count], '../data/profile/temp-' + str(count - 1) + '.afas','../data/final/result.afas') (ret, out, err) = muscle(files[count], '../data/profile/temp-' + str(count - 1) + '.afas','../data/final/result.afas') Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err)) elif filesNumber >= count + 3: Log.info('Profiling a single file..') print(files[count+1], '../data/profile/temp-' + str(count) + '.afas', '../data/profile/temp-' + str(count+1) + '.afas') (ret, out, err) = muscle(files[count+1], '../data/profile/temp-' + str(count) + '.afas', '../data/profile/temp-' + str(count+1) + '.afas') Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err)) count = count + 1 Log.info('Please see the results in folder data/final') Log.info('------------------------') Log.info('Profiler Task Completes') Log.info('------------------------')
def main(): log = Log(''.join(choice(ascii_uppercase) for i in range(12))) Log.info('------------------------------') Log.info('MUSCLE Process Starts') Log.info('------------------------------') inputArgs = sys.argv args = inputArgs[1:] filePath, fileName = Input.getValues(args) Log.info('Path:') Log.info(filePath) Log.info('Filename:') Log.info(fileName) try: with open(filePath) as f: lines = sum(1 for _ in f) except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() Log.info('Number of sequences:') Log.info(lines / 2) outputFilePath = filePath.replace('in', 'out') outputFilePath = outputFilePath.replace('fas', 'afas') Log.info('Starting MUSCLE..') print(filePath, outputFilePath) (ret, out, err) = muscle(filePath, outputFilePath) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err)) Log.info('------------------------') Log.info('MUSCLE Process Completes') Log.info('------------------------')