def createCollection(articlesPath=ARTICLES_DIR_NAME): if not os.path.exists(articlesPath): print('ОШИБКА: не получается найти статьи') return -1 normalizedArrayOfDocRes, tfCounter = morph.normalizeArrayOfDoc( extractText(articlesPath).split(SPLIT_SENTENCES)) utils.writeToFile(COLLECTIONS_FILE_NAME, normalizedArrayOfDocRes) utils.writeToFile(TF_FILE_NAME, tfCounter) return 0
def main(argv): team = argv[0] if argv[0] != 'overall' else {'$exists': True} teamName = argv[0] if argv[0] != 'overall' else 'overall' print('Connecting to Mongo') client = pymongo.MongoClient(os.getenv('MONGO_URI'), ssl_cert_reqs=ssl.CERT_NONE) collection = client.epilog.data print('fetching metadata') doc_count = collection.estimated_document_count() print('loading all data from database') printProgressBar(0, doc_count, prefix='Progress:', suffix='Complete', length=50) data = defaultdict(def_value) index = 0 for doc in collection.find({ 'experimentLabel': team, 'x': { '$exists': True }, 'y': { '$exists': True }, 'z': { '$exists': True } }).sort('time', pymongo.ASCENDING): printProgressBar(index, doc_count, prefix='Progress:', suffix='Complete', length=50) index = index + 1 x = int(doc['x']) y = int(doc['z']) data[x, y] += 1 data = sorted(data.items(), key=lambda key: key[0]) print('') print('Writing data to file') writeToFile(data, teamName, 'csv') return 0
def ast(root: State) -> None: start_time = time() path_to_goal: Deque[str] = deque() cost_of_path: int = 0 nodes_expanded: int = 0 search_depth: int = 0 max_search_depth: int = 0 running_time: float = 0 max_ram_usage: float = 0 fringe: List[State] = [root] heapify(fringe) visited: List[State] = [] while len(fringe) > 0: currentState: State = heappop(fringe) visited.append(currentState) if currentState.isGoal(): end_time = time() while currentState.direction is not None: path_to_goal.appendleft(currentState.direction) currentState = currentState.parent cost_of_path = len(path_to_goal) path_to_goal = f'{path_to_goal}'.replace('deque(', '').replace(')', '') nodes_expanded = len(visited) - 1 for state in fringe: if state.depth > search_depth: search_depth = state.depth max_search_depth = search_depth for state in visited: if state.depth > max_search_depth: max_search_depth = state.depth running_time = end_time - start_time max_ram_usage = getrusage(RUSAGE_SELF).ru_maxrss / 1024 writeToFile(path_to_goal, cost_of_path, nodes_expanded, search_depth, max_search_depth, running_time, max_ram_usage) return for childState in currentState.getChildren(): if (childState is not None) and (childState not in visited): if childState not in fringe: heappush(fringe, childState) else: item = fringe[fringe.index(childState)] if childState < item: item = childState heapify(fringe)
def main(): '''Run orangfuzz and randomly generate lines for the desired device.''' args = parseArgs() if args.seed is None: args.seed = int(math.floor(random.random() * math.pow(2, 28))) rndObj = random.Random(args.seed) orangDevice = Unagi() allLines = [] allLines = prepopulateStart(orangDevice, rndObj, allLines) allLines = generateLines(args, orangDevice, rndObj, allLines) writeToFile(args, allLines)
def plot_statistics(statistics_dicts, out_filename): tex_str = '' offset_strs = list(statistics_dicts.keys()) offset_strs.sort() for offset_str in offset_strs: statistics_dict = statistics_dicts[offset_str] keys = statistics_dict.keys() # all but locally converged (2) and early stopped (5) count as possibly crossed lines = ['({}, {})'.format(n_hidden, 1.0 - ((statistics_dict[n_hidden][2] + statistics_dict[n_hidden][5]) / np.sum(statistics_dict[n_hidden]))) for n_hidden in np.sort(np.array(list(keys)))] tex_str += '\\addplot coordinates {\n' + '\n'.join(lines) + '};\n\\addlegendentry{$\\Delta = ' + offset_str + '$}\n' print('LaTeX code excerpt:') print(tex_str) tex_str = utils.readFromFile('tex_head.txt') + tex_str + utils.readFromFile('tex_tail.txt') utils.writeToFile(out_filename, tex_str)
def baselinePerformance(labeled_data, k): execution_times = [] # CUSTOM K-MEANS CLUSTERING ALGORITHM start_time = time.time() custom_clusters = clustering.k_means_clustering(labeled_data, k) execution_times.append(time.time() - start_time) # SKLEARN K-MEANS CLUSTERING ALGORITHM start_time = time.time() sklearn_clusters = clustering.sklearnKMeansClustering(labeled_data, k) execution_times.append(time.time() - start_time) # SKLEARN AGGLOMERATIVE CLUSTERING ALGORITHM start_time = time.time() agglomerative_clusters = clustering.sklearnAgglomerativeClustering(labeled_data, k) execution_times.append(time.time() - start_time) utils.writeToFile(config.CUSTOM_KMEANS_OUTPUT, custom_clusters) utils.writeToFile(config.SKLEARN_KMEANS_OUTPUT, sklearn_clusters) utils.writeToFile(config.AGGLOMERATIVE_KMEANS_OUTPUT, agglomerative_clusters) if config.DEBUG: print("custom_clusters:\n", custom_clusters) print("sklearn_clusters:\n", sklearn_clusters) print("agglomerative_clusters:\n", agglomerative_clusters) # [time1, time2, time3] # time1 = CUSTOM K-MEANS, time2 = SKLEARN K-MEANS, time3 = # SKLEARN AGGLOMERATIVE return [custom_clusters, sklearn_clusters, agglomerative_clusters], execution_times
def main(): execution_times = [] file_names = utils.loadFileNames() mfccs = loadMFCCSValues(file_names) labeled_data = utils.createLabeledData(file_names, mfccs) # Run sklearn DBSCAN algorithm with default parameters, since everything is considered noise, no k value is taken dbscan_clusters = clustering.sklearnDBSCAN(labeled_data, 0.5) utils.writeToFile(config.DBSCAN_KMEANS_OUTPUT_DEFAULT_PARAMETERS, dbscan_clusters) # Run sklearn DBSCAN algorithm with EPS = 25, write to file and get the k value from it start_time = time.time() dbscan_clusters = clustering.sklearnDBSCAN(labeled_data, config.DBSCAN_EPS) execution_times.append(time.time() - start_time) utils.writeToFile(config.DBSCAN_KMEANS_OUTPUT_EPS, dbscan_clusters) k = len(dbscan_clusters) results = baselinePerformance(labeled_data, k) execution_times += results[1] execution_times = ["%.4f" % member for member in execution_times] custom_clusters = results[0][0] sklearn_clusters = results[0][1] agglomerative_clusters = results[0][2] custom_clusters = sorted(custom_clusters) sklearn_clusters = sorted(sklearn_clusters) agglomerative_clusters = sorted(agglomerative_clusters) dbscan_clusters = sorted(dbscan_clusters) if config.VERBOSE: print("dbscan_clusters:\n", dbscan_clusters) print("custom_clusters:\n", custom_clusters) print("sklearn_clusters:\n", sklearn_clusters) print("agglomerative_clusters:\n", agglomerative_clusters) print("execution_times:\n", execution_times) goodPerformance(file_names, dbscan_clusters, custom_clusters, sklearn_clusters, agglomerative_clusters)
def main(argv): team = argv[0] print('Connecting to Mongo') client = pymongo.MongoClient(os.getenv('MONGO_URI'), ssl_cert_reqs=ssl.CERT_NONE) collection = client.epilog.data print('fetching metadata') mongo_filter = {'experimentLabel': team, 'event': 'PlayerLocationEvent'} # mongo_filter = {'event': 'PlayerLocationEvent'} for time in collection.find(mongo_filter).sort('time', pymongo.ASCENDING).limit(1): _time = math.floor(time['time'] / 1000) if time_offset: _time_offset = _time min_time = 0 else: min_time = _time for time in collection.find(mongo_filter).sort( 'time', pymongo.DESCENDING).limit(1): if time_offset: max_time = math.floor(time['time'] / 1000) - _time_offset else: max_time = math.floor(time['time'] / 1000) doc_count = collection.estimated_document_count() print('loading all data from database') # printProgressBar(0, doc_count, prefix='Progress:', # suffix='Complete', length=50) data = dict() data['time'] = {'min': min_time, 'max': max_time} data['timeline'] = dict() index = 0 for doc in collection.find(mongo_filter).sort('time', pymongo.ASCENDING): printProgressBar(index, doc_count, prefix='Progress:', suffix='Complete', length=50) index = index + 1 if doc['event'] != "PlayerLocationEvent": continue doc['_id'] = str(doc['_id']) if time_offset: time = str(math.floor(doc['time'] / 1000) - _time_offset) doc['time'] = doc['time'] - (_time_offset * 1000) else: time = str(math.floor(doc['time'] / 1000)) if time in data['timeline']: data['timeline'][time].append(doc) else: data['timeline'][time] = [doc] print('') print('Writing data to file') writeToFile(data, team, 'json') return 0
def main(): global args, max_length args = parser.parse_args() if args.eval: if not os.path.exists(args.output_dir): print("Output directory do not exists") exit(0) try: model = EncoderDecoder().load(args.output_dir) print("Model loaded successfully") except: print("The trained model could not be loaded...") exit() test_pairs = readFile(args.test_file) outputs = model.evaluatePairs(test_pairs, rand=False, char=args.char) writeToFile(outputs, os.path.join(args.output_dir, "output.pkl")) reference = [] hypothesis = [] for (hyp, ref) in outputs: if args.char or args.char_bleu: reference.append([list(ref)]) hypothesis.append(list(hyp)) else: reference.append([ref.split(" ")]) hypothesis.append(hyp.split(" ")) bleu_score = compute_bleu(reference, hypothesis) print("Bleu Score: " + str(bleu_score)) print( model.evaluateAndShowAttention( "L'anglais n'est pas facile pour nous.", char=args.char)) print( model.evaluateAndShowAttention( "J'ai dit que l'anglais est facile.", char=args.char)) print( model.evaluateAndShowAttention( "Je n'ai pas dit que l'anglais est une langue facile.", char=args.char)) print( model.evaluateAndShowAttention("Je fais un blocage sur l'anglais.", char=args.char)) else: input_lang, output_lang, pairs = prepareData(args.train_file) print(random.choice(pairs)) if args.char: model = EncoderDecoder(args.hidden_size, input_lang.n_chars, output_lang.n_chars, args.drop, args.tfr, args.max_length, args.lr, args.simple, args.bidirectional, args.dot, False, 1) else: model = EncoderDecoder(args.hidden_size, input_lang.n_words, output_lang.n_words, args.drop, args.tfr, args.max_length, args.lr, args.simple, args.bidirectional, args.dot, args.multi, args.num_layers) model.trainIters(pairs, input_lang, output_lang, args.n_iters, print_every=args.print_every, plot_every=args.plot_every, char=args.char) model.save(args.output_dir) model.evaluatePairs(pairs, char=args.char)