async def main(): args = vars(parser.parse_args()) if 'folder' in args and args['folder'] is not None: args['folder'] = args['folder'].strip('/ ') db = DataStore(args['dbDir'], args['debug']) await db.load() inputs = {} labelRegex = re.compile(args['label']) if labelRegex.groups == 0: # We're in normal mode; one path per argument inputs[args['label']] = {} for arg in [ 'input', 'tree', 'performance', 'graph', 'otf2', 'physl', 'python', 'cpp' ]: if len(args[arg]) == 1: inputs[args['label']][arg] = args[arg][0] elif len(args[arg]) > 1: raise Exception( 'To use glob patterns, please provide a regular expression with one capture group as a --label argument' ) if not inputs[args['label']]: raise Exception( 'At least one of: --input, --tree, --performance, --graph, --otf2, --physl, --python, and/or --cpp is required' ) elif labelRegex.groups == 1: # We're in globbing mode; we can expect many files per argument, and # --label should be a regular expression that matches input files to # their label The only (possible) exception are code files: if only # one is provided, use it for all labels (otherwise, expect it to match # the regular expression as well) singlePhysl = args['physl'][0] if len(args['physl']) == 1 else None singlePython = args['python'][0] if len(args['python']) == 1 else None singleCpp = args['cpp'][0] if len(args['cpp']) == 1 else None for arg in [ 'input', 'tree', 'performance', 'graph', 'otf2', 'physl', 'python', 'cpp' ]: if arg == 'physl' and singlePhysl is not None: continue if arg == 'python' and singlePython is not None: continue if arg == 'cpp' and singleCpp is not None: continue for path in args[arg]: pathMatch = labelRegex.match(path) if pathMatch is None: raise Exception( '--label pattern could not identify a label for file: %s' % path) label = pathMatch[1].replace('/', '') inputs[label] = inputs.get(label, {}) if arg in inputs[label]: raise Exception( '--label pattern found duplicate matches for --%s:\n%s\n%s' % (arg, inputs[label][arg], path)) inputs[label][arg] = path for label in inputs: if singlePhysl is not None: inputs[label]['physl'] = singlePhysl if singlePython is not None: inputs[label]['python'] = singlePython if singleCpp is not None: inputs[label]['cpp'] = singleCpp else: raise Exception('Too many capturing groups in the --label argument') for label, paths in inputs.items(): if 'input' in paths and ('tree' in paths or 'performance' in paths or 'graph' in paths): raise Exception( 'Don\'t use --input with --tree, --performance, or --graph for the same --label: %s' % label) try: # Initialize the dataset datasetId = db.createDataset()['info']['datasetId'] # Prefix the label with the folder if one was specified if 'folder' in args and args['folder'] is not None: label = args['folder'] + '/' + label await logToConsole('#################' + ''.join(['#' for x in range(len(label))])) await logToConsole('Adding data for: %s (%s)' % (datasetId, label)) # Assign its name db.rename(datasetId, label) # Assign any tags if args['tags'] is not None: tags = {t: True for t in args['tags'].split(',')} db.addTags(datasetId, tags) # Handle performance files if 'performance' in paths: with open(paths['performance'], 'r') as file: await db.processCsvFile(datasetId, file) # Handle tree files: if 'tree' in paths: with open(paths['tree'], 'r') as file: await db.processNewickFile(datasetId, file) # Handle graph files: if 'graph' in paths: with open(paths['graph'], 'r') as file: await db.processDotFile(datasetId, file) # Handle stdout from phylanx if 'input' in paths: with open(paths['input'], 'r') as file: await db.processPhylanxLogFile(datasetId, file) # Handle code files for codeType in ['physl', 'python', 'cpp']: if codeType in paths: with open(paths[codeType], 'r') as file: await db.processCodeFile(datasetId, file, codeType) # Handle otf2 if 'otf2' in paths: db.addSourceFile(datasetId, paths['otf2'], 'otf2') await db.processOtf2(datasetId, FakeFile(paths['otf2'])) # Save all the data await db.save(datasetId) except: #pylint: disable=W0702 await logToConsole( 'Error encountered; purging corrupted data for: %s' % datasetId ) del db[datasetId] raise