def main(): cmdline_args = sys.argv[1:] if len(cmdline_args) == 0: print("usage python main.py generate|classify") exit(-1) operation = cmdline_args[0] if operation == "generate": files = cmdline_args[1:] if len(files) > 2 or len(files) == 0: print( "usage python main.py generate <training_file_csv> [<test_file_csv>]" ) exit(-1) for file in files: if not os.path.isfile(file): print(file + " not found") exit(-1) generate_data.main(files[0], os.path.splitext(files[0])[0] + "_features.csv") if len(files) == 2: generate_data.main(files[1], os.path.splitext(files[1])[0] + "_features.csv", is_training=False) elif operation == "classify": cmdline_args = cmdline_args[1:] classifier_to_use = cmdline_args[0] if len(cmdline_args) == 2: print("Only training the model") should_test = False training_file_name = cmdline_args[1] elif len(cmdline_args) == 4: print("Training the model and generating predictions for test set") should_test = True training_file_name = cmdline_args[1] test_file_name = cmdline_args[2] prediction_file_name = cmdline_args[3] else: print( "Usage: python classify <classifier_file.py> <training-csv-file> [<testing-csv-file> <predictions-output-file>]" ) exit(-1) if not os.path.isfile(training_file_name): print(training_file_name + " not present") exit(-1) elif should_test and not os.path.isfile(test_file_name): print(test_file_name + " not present") exit(-1) classifier = __import__(classifier_to_use) model = classifier.train_model(training_file_name) if should_test: classifier.generate_predictions(model, test_file_name, prediction_file_name) else: print("usage python main.py generate|classify") exit(-1)
def load_data(self): self.X_train = [] self.y_train = [] embedding_dict = embedding.word_to_embedding() self.X_train, self.y_train = embedding.generate_embeddings(generate_data.main("bigdata"), \ embedding_dict, self.config.embed_size, self.config.threshold) self.X_dev, self.y_dev = embedding.generate_embeddings(generate_data.main("bigdata"), \ embedding_dict, self.config.embed_size, self.config.threshold) self.X_test, self.y_test = embedding.generate_embeddings(generate_data.main("bigtest"), \ embedding_dict, self.config.embed_size, self.config.threshold) self.X_test, self.y_test = generate_data.shuffle( self.X_test, self.y_test, 100)
def return_stuff(foldername): embedding_dict = embedding.word_to_embedding() new_dict = defaultdict(int) counter = 0 for key in embedding_dict: new_dict[key] = counter counter += 1 paragraphs = generate_data.main(foldername) word_pars = [] label_pars = [] for paragraph in paragraphs: new_list = [] check = 0 for word in paragraph[0]: new_list.append(new_dict[word]) word_pars.append(new_list) label_pars.append(paragraph[1]) #if foldername == "bigtest": # return np.array(word_pars[:100]), np.array(label_pars[:100]) return np.array(word_pars), np.array(label_pars)
import flask import hashlib import json import logging import os import sqlite3 import subprocess import sys from werkzeug import debug # Generate test data when running locally data_dir = os.path.join(os.path.dirname(__file__), 'data') if not os.path.exists(data_dir): import generate_data os.mkdir(data_dir) generate_data.main(data_dir, 'dummy-password', 'dummy-proof', 'dummy-plans') secrets = json.load(open(os.path.join(data_dir, 'secrets.json'))) index_html = open('index.html').read() app = flask.Flask(__name__) # Turn on backtraces, but turn off code execution (that'd be an easy level!) app.config['PROPAGATE_EXCEPTIONS'] = True app.wsgi_app = debug.DebuggedApplication(app.wsgi_app, evalex=False) app.logger.addHandler(logging.StreamHandler(sys.stderr)) # use persistent entropy file for secret_key app.secret_key = open(os.path.join(data_dir, 'entropy.dat')).read() # Allow setting url_root if needed try:
task.upload_artifact('generator', artifact_object='generator.pickle', delete_after_upload=True) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--dataset', type=str, action='store', required=True) parser.add_argument('--generator', type=str, action='store', required=True) parser.add_argument('--parameters', type=str, action='store', required=False, default="{}") parser.add_argument('--final', action='store_true') parser.add_argument('--generate', action='store_true') args = parser.parse_args() extra_args = json.loads(args.parameters) tags = ["TrainGenerator"] if args.final: tags.append("Final") task = Task.init(project_name="Synthetic data generators", task_name="train_generator", tags=tags, reuse_last_task_id=not args.final) main(task, args.dataset, args.generator, extra_args) task.flush(wait_for_uploads=True) # Let server parse our uploads sleep(20.0) if args.generate: generate_data.main(task, task.task_id, {})
import generate_data import os import uuid filename = 'py-script-generated' REPEAT = 100 for i in range(REPEAT): unique_filename = filename + str(uuid.uuid4()) generate_data.main(unique_filename) os.system( 'gsutil cp ' + unique_filename + '.json gs://stephwangstarter-files-source-1574201370 2>&1 > /dev/null') os.remove(unique_filename + '.json') print("file: " + unique_filename + " iteration: " + str(i + 1))
import generate_data, double_size, cards print('Generate Data') generate_data.main() print('Create Card Images') cards.do_the_thing() print('Resize All Images') double_size.main()
os.mkdir(database_path_current) # Copy Parameter File shutil.copyfile("./parameter.py", database_path_current + "/parameter.py") # Download Image for i_class in range(num_classes): if not os.path.exists(database_path + "/" + classes[i_class]): download.main(classes[i_class]) # 4-fold cross validation (1:3 x 4) for i_cross_num in range(1, 1 + parameter.cross_num): print(i_cross_num) database_path_current_cross = database_path_current + \ "/cross" + str(i_cross_num) if not os.path.exists(database_path_current_cross): os.mkdir(database_path_current_cross) if not os.path.exists(database_path_current_cross + "/animal_.npy"): print("===== Generate Data =====") generate_data.main(i_cross_num) animal_cnn.main(i_cross_num) calc_mean.main() calc_time = time.time() - start_time notify_line.main(calc_time) print("=== Main Script Finished ===")
ET.SubElement(row, self.headers[col_1]).text = str(i) ET.SubElement(row, str(aggr)+'_'+self.headers[col_2]).text = str(result[i]) tree = ET.ElementTree(root) tree.write(file) with open(file, 'r') as original: data = original.read() with open(file, 'w') as modified: modified.write('<?xml version="1.0" encoding="UTF-8"?>\n' + data) rows=input('Сколько требуется сгенерировать объектов данных: ') generate_data.main(int(rows)) logging.basicConfig(level=logging.INFO) logging.info('Start aggregation. Start time = %s', str(datetime.now())) print('Cписок доступных аггрегатов: count, sum, avg, min, max') agg=input('Введите аггрегат ') print('В наборе данных имеются следующие поля:') for i in enumerate(Data().headers): print(i) print('Напишите номер поля относительно которого вы хотите провести расчеты. Например, 6 - это type') c_1=input('Ваш выбор = ') print('Напишите номер поля, над которым вы хотите провести расчеты. Например, 4 - это duration') c_2=input('Ваш выбор = ')
def setUpClass(self): super(ViewsTest, self).setUpClass() generate_data.main()
def setUpClass(self): super(QueryTest, self).setUpClass() generate_data.main()