예제 #1
0
def main():
    cmdline_args = sys.argv[1:]
    if len(cmdline_args) == 0:
        print("usage python main.py generate|classify")
        exit(-1)
    operation = cmdline_args[0]
    if operation == "generate":
        files = cmdline_args[1:]
        if len(files) > 2 or len(files) == 0:
            print(
                "usage python main.py generate <training_file_csv> [<test_file_csv>]"
            )
            exit(-1)
        for file in files:
            if not os.path.isfile(file):
                print(file + " not found")
                exit(-1)
        generate_data.main(files[0],
                           os.path.splitext(files[0])[0] + "_features.csv")
        if len(files) == 2:
            generate_data.main(files[1],
                               os.path.splitext(files[1])[0] + "_features.csv",
                               is_training=False)
    elif operation == "classify":
        cmdline_args = cmdline_args[1:]
        classifier_to_use = cmdline_args[0]
        if len(cmdline_args) == 2:
            print("Only training the model")
            should_test = False
            training_file_name = cmdline_args[1]
        elif len(cmdline_args) == 4:
            print("Training the model and generating predictions for test set")
            should_test = True
            training_file_name = cmdline_args[1]
            test_file_name = cmdline_args[2]
            prediction_file_name = cmdline_args[3]
        else:
            print(
                "Usage: python classify <classifier_file.py> <training-csv-file> [<testing-csv-file> <predictions-output-file>]"
            )
            exit(-1)
        if not os.path.isfile(training_file_name):
            print(training_file_name + " not present")
            exit(-1)
        elif should_test and not os.path.isfile(test_file_name):
            print(test_file_name + " not present")
            exit(-1)
        classifier = __import__(classifier_to_use)
        model = classifier.train_model(training_file_name)
        if should_test:
            classifier.generate_predictions(model, test_file_name,
                                            prediction_file_name)
    else:
        print("usage python main.py generate|classify")
        exit(-1)
예제 #2
0
    def load_data(self):
        self.X_train = []
        self.y_train = []

        embedding_dict = embedding.word_to_embedding()

        self.X_train, self.y_train = embedding.generate_embeddings(generate_data.main("bigdata"), \
                embedding_dict, self.config.embed_size, self.config.threshold)
        self.X_dev, self.y_dev = embedding.generate_embeddings(generate_data.main("bigdata"), \
                embedding_dict, self.config.embed_size, self.config.threshold)
        self.X_test, self.y_test = embedding.generate_embeddings(generate_data.main("bigtest"), \
                embedding_dict, self.config.embed_size, self.config.threshold)
        self.X_test, self.y_test = generate_data.shuffle(
            self.X_test, self.y_test, 100)
예제 #3
0
def return_stuff(foldername):
    embedding_dict = embedding.word_to_embedding()
    new_dict = defaultdict(int)
    counter = 0

    for key in embedding_dict:
        new_dict[key] = counter
        counter += 1
    paragraphs = generate_data.main(foldername)
    word_pars = []
    label_pars = []
    for paragraph in paragraphs:
        new_list = []
        check = 0
        for word in paragraph[0]:
            new_list.append(new_dict[word])
        word_pars.append(new_list)
        label_pars.append(paragraph[1])
    #if foldername == "bigtest":
    #    return np.array(word_pars[:100]), np.array(label_pars[:100])
    return np.array(word_pars), np.array(label_pars)
예제 #4
0
import flask
import hashlib
import json
import logging
import os
import sqlite3
import subprocess
import sys
from werkzeug import debug

# Generate test data when running locally
data_dir = os.path.join(os.path.dirname(__file__), 'data')
if not os.path.exists(data_dir):
    import generate_data
    os.mkdir(data_dir)
    generate_data.main(data_dir, 'dummy-password', 'dummy-proof',
                       'dummy-plans')

secrets = json.load(open(os.path.join(data_dir, 'secrets.json')))
index_html = open('index.html').read()
app = flask.Flask(__name__)

# Turn on backtraces, but turn off code execution (that'd be an easy level!)
app.config['PROPAGATE_EXCEPTIONS'] = True
app.wsgi_app = debug.DebuggedApplication(app.wsgi_app, evalex=False)

app.logger.addHandler(logging.StreamHandler(sys.stderr))
# use persistent entropy file for secret_key
app.secret_key = open(os.path.join(data_dir, 'entropy.dat')).read()

# Allow setting url_root if needed
try:
예제 #5
0
    task.upload_artifact('generator', artifact_object='generator.pickle', delete_after_upload=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument('--dataset', type=str, action='store', required=True)
    parser.add_argument('--generator', type=str, action='store', required=True)
    parser.add_argument('--parameters', type=str, action='store', required=False, default="{}")
    parser.add_argument('--final', action='store_true')
    parser.add_argument('--generate', action='store_true')

    args = parser.parse_args()
    extra_args = json.loads(args.parameters)

    tags = ["TrainGenerator"]
    if args.final:
        tags.append("Final")

    task = Task.init(project_name="Synthetic data generators", task_name="train_generator", tags=tags,
                     reuse_last_task_id=not args.final)

    main(task, args.dataset, args.generator, extra_args)
    task.flush(wait_for_uploads=True)

    # Let server parse our uploads
    sleep(20.0)

    if args.generate:
        generate_data.main(task, task.task_id, {})
예제 #6
0
import generate_data
import os
import uuid

filename = 'py-script-generated'
REPEAT = 100

for i in range(REPEAT):
    unique_filename = filename + str(uuid.uuid4())
    generate_data.main(unique_filename)
    os.system(
        'gsutil cp ' + unique_filename +
        '.json gs://stephwangstarter-files-source-1574201370 2>&1 > /dev/null')
    os.remove(unique_filename + '.json')
    print("file: " + unique_filename + " iteration: " + str(i + 1))
예제 #7
0
import flask
import hashlib
import json
import logging
import os
import sqlite3
import subprocess
import sys
from werkzeug import debug

# Generate test data when running locally
data_dir = os.path.join(os.path.dirname(__file__), 'data')
if not os.path.exists(data_dir):
    import generate_data
    os.mkdir(data_dir)
    generate_data.main(data_dir, 'dummy-password', 'dummy-proof', 'dummy-plans')

secrets = json.load(open(os.path.join(data_dir, 'secrets.json')))
index_html = open('index.html').read()
app = flask.Flask(__name__)

# Turn on backtraces, but turn off code execution (that'd be an easy level!)
app.config['PROPAGATE_EXCEPTIONS'] = True
app.wsgi_app = debug.DebuggedApplication(app.wsgi_app, evalex=False)

app.logger.addHandler(logging.StreamHandler(sys.stderr))
# use persistent entropy file for secret_key
app.secret_key = open(os.path.join(data_dir, 'entropy.dat')).read()

# Allow setting url_root if needed
try:
예제 #8
0
import generate_data, double_size, cards

print('Generate Data')
generate_data.main()
print('Create Card Images')
cards.do_the_thing()
print('Resize All Images')
double_size.main()
예제 #9
0
    os.mkdir(database_path_current)

# Copy Parameter File
shutil.copyfile("./parameter.py", database_path_current + "/parameter.py")

# Download Image
for i_class in range(num_classes):
    if not os.path.exists(database_path + "/" + classes[i_class]):
        download.main(classes[i_class])

# 4-fold cross validation (1:3 x 4)
for i_cross_num in range(1, 1 + parameter.cross_num):
    print(i_cross_num)
    database_path_current_cross = database_path_current + \
        "/cross" + str(i_cross_num)
    if not os.path.exists(database_path_current_cross):
        os.mkdir(database_path_current_cross)

    if not os.path.exists(database_path_current_cross + "/animal_.npy"):
        print("===== Generate Data =====")
        generate_data.main(i_cross_num)

    animal_cnn.main(i_cross_num)

calc_mean.main()

calc_time = time.time() - start_time
notify_line.main(calc_time)

print("=== Main Script Finished ===")
예제 #10
0
                ET.SubElement(row, self.headers[col_1]).text = str(i)
                ET.SubElement(row, str(aggr)+'_'+self.headers[col_2]).text = str(result[i])


        tree = ET.ElementTree(root)
        tree.write(file)

        with open(file, 'r') as original:
            data = original.read()
        with open(file, 'w') as modified:
            modified.write('<?xml version="1.0" encoding="UTF-8"?>\n' + data)



rows=input('Сколько требуется сгенерировать объектов данных: ')
generate_data.main(int(rows))

logging.basicConfig(level=logging.INFO)
logging.info('Start aggregation. Start time = %s', str(datetime.now()))

print('Cписок доступных аггрегатов: count, sum, avg, min, max')
agg=input('Введите аггрегат  ')

print('В наборе данных имеются следующие поля:')
for i in enumerate(Data().headers):
    print(i)
print('Напишите номер поля относительно которого вы хотите провести расчеты. Например, 6 - это type')
c_1=input('Ваш выбор = ')
print('Напишите номер поля, над которым вы хотите провести расчеты. Например, 4 - это duration')
c_2=input('Ваш выбор = ')
예제 #11
0
 def setUpClass(self):
     super(ViewsTest, self).setUpClass()
     generate_data.main()
예제 #12
0
 def setUpClass(self):
     super(QueryTest, self).setUpClass()
     generate_data.main()