def main(): Log.info('------------------') Log.info('Split task Starts') Log.info('------------------') inputArgs = sys.argv args = inputArgs[1:] filePath, fileName = Input.getValues(args) Log.info('Path:') Log.info(filePath) Log.info('File name:') Log.info(fileName) try: with open(filePath) as f: lines = sum(1 for _ in f) except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() Log.info('Number of sequences:') Log.info(lines/2) Log.info('The dataset is being splittted in files of 50 sequences.') Log.info('Total files:') filesNumber = math.ceil(lines/100) Log.info(filesNumber) filesList = [] count = 0 Log.info('File names:') Log.info('---------------------') Log.info('Split task Completes') Log.info('---------------------')
def main(): log = Log(''.join(choice(ascii_uppercase) for i in range(12))) Log.info('------------------------------') Log.info('MUSCLE Process Starts') Log.info('------------------------------') inputArgs = sys.argv args = inputArgs[1:] filePath, fileName = Input.getValues(args) Log.info('Path:') Log.info(filePath) Log.info('Filename:') Log.info(fileName) try: with open(filePath) as f: lines = sum(1 for _ in f) except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() Log.info('Number of sequences:') Log.info(lines / 2) outputFilePath = filePath.replace('in', 'out') outputFilePath = outputFilePath.replace('fas', 'afas') Log.info('Starting MUSCLE..') print(filePath, outputFilePath) (ret, out, err) = muscle(filePath, outputFilePath) Log.info("return, {}".format(ret)) Log.info("output, {}".format(out)) Log.error("error, {}".format(err)) Log.info('------------------------') Log.info('MUSCLE Process Completes') Log.info('------------------------')
def main(): Log.info('------------------') Log.info('Split Task Starts') Log.info('------------------') inputArgs = sys.argv args = inputArgs[1:] filePath, fileName = Input.getValues(args) Log.info('Path:') Log.info(filePath) Log.info('File name:') Log.info(fileName) try: with open(filePath) as f: lines = sum(1 for _ in f) except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() Log.info('Number of sequences:') Log.info(lines / 2) Log.info('The dataset is being split in files of 50 sequences.') Log.info('Total files:') filesNumber = math.ceil(lines / 100) Log.info(filesNumber) filesList = [] count = 0 Log.info('File names:') while count < filesNumber: count += 1 filesList.append('in-' + str(count) + '-' + fileName) for file in filesList: Log.info(file) try: lineCount = 0 filesCount = 0 input = open(filePath, "r") for line in input: if lineCount % 100 == 0: try: output.close() except: Log.info('Start splitting the dataset..') output = open('../data/input/' + filesList[filesCount], "a") filesCount = filesCount + 1 output.write(line) lineCount = lineCount + 1 input.close() Log.info('Finish splitting the dataset..') except EnvironmentError as e: Log.error('Unkown error:') Log.error(e) Log.exit() Log.info('---------------------') Log.info('Split Task Completes') Log.info('---------------------')
def getFiles(cls): filesTemp = [] files = [] try: filesTemp = listdir('../data/output') except EnvironmentError as e: Log.error('Input file error:') Log.error(e) Log.exit() for file in filesTemp: if re.search('.*afas$', file): files.append('../data/output/' + file) files.sort() return files
def getValues(cls, inputArgs): Log.info("input arguments: {}".format(inputArgs)) options = "f:" longOptions = ["file="] try: opts, args = getopt.getopt(inputArgs, options, longOptions) except getopt.GetoptError as err: Log.error(err) Log.exit() fileFlag = False filePath = None fileName = None for opt, arg in opts: Log.info("processing option: {} with arguments: {}".format( opt, arg)) if opt in ("-f", "--file"): if fileFlag: Log.error( "Input Error. Can't pass one argument twice. Exiting the application.." ) Log.exit() else: fileFlag = True filePath = arg fileName = cls.fileInput(arg) if fileFlag is False: Log.error( "Input Error. You must specify a valid file. Exiting the application.." ) Log.exit() return filePath, fileName