Beispiel #1
0
def refine_corpus(corpus, rule_path, output=None, thread=None):
    """
    Clean up the given corpus according to the rules defined in the files.
    This method utilizes multithreading to accelerate the process.

    Arguments:
        corpus(str): Path to the corpus file.
        rule_path(str): Path to where "parentheses.tsv" and 
            "refine_list.tsv" are.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    if output is None:
        output = corpus[:-4] + "_cleaned.txt"
    if not rule_path.endswith("/"):
        rule_path += "/"

    # Load rule files
    file_p = rule_path + "parentheses.tsv"
    file_r = rule_path + "refine_list.tsv"
    parentheses = load_rules(file_p)
    refine_list = load_rules(file_r)

    # Acquire the corpus (skip first line)
    raw_data = readlines(corpus)

    # Threading
    param = (parentheses, refine_list)
    result = generic_threading(thread, raw_data, corpus_cleanup, param)

    # Write all result to file
    write_to_file(output, result)
Beispiel #2
0
    def create_repository(workspace, bare=False):
        if not os.path.exists(workspace):
            os.mkdir(workspace)
        os.chdir(workspace)

        if not bare:
            os.mkdir(Repository.GIT_DIR)
            os.chdir(Repository.GIT_DIR)

        for new_dir in Repository.INIT_DIR:
            os.mkdir(new_dir)

        for file_and_content in Repository.INIT_FILE:
            file_name = file_and_content[0]
            content = file_and_content[1]
            write_to_file(file_name, content)

        init_config_dict = {
            'core': {
                'repositoryformatversion': '0',
                'filemode': 'true',
                'bare': str(bare).lower(),
                'logallrefupdates': 'true',
            }
        }

        content = Config.create_config(init_config_dict)
        write_to_file('config', content)
Beispiel #3
0
def extract_vocabularies(corpus, rule, output=None, thread=None):
    """
    Extract vocabularies from the corpus, additional rules to achieve
    purer vocabularies can be defined in src/refine_rules/voc_cleanup.tsv

    Arguments:
        corpus(str): Path to the corpus file.
        rule(str): Path to the processing rule file.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    if output is None:
        output = corpus[:-4] + "_vocabulary_list.json"

    # Load rules
    rules = load_rules(rule)

    # Acquire the corpus
    raw_data = readlines(corpus, limit=None)

    # Threading (TO-BE-IMPLEMENTED)
    # param = (rules, "SPLIT_WORDS")
    # generic_threading(thread, raw_data, punctuation_cleanup, param)
    result = punctuation_cleanup(0, raw_data, rules, mode='SPLIT_WORDS')

    # Counting occurance
    print("Counting occurance...")
    voc_list = Counter(result)

    # Save vocabulary to file
    write_to_file(output, voc_list)
def parse_subwords(file):
    """
    Parse subword mapping to dictionary.

    Args:
        file(str): Path to the subword mapping file.
                   format: <[WORD]>S1,S2,...
    """
    # Load subwords
    raw_data = readlines(file, limit=None)

    # Parsing information
    mentions, subwords = list(), list()
    for itr in raw_data:
        index = itr.find(">")
        mentions.append(itr[1:index])
        subwords.append(itr[index + 2:].split(","))

    # Create dictionary for mentions and its subwords
    dictionary = dict(zip(mentions, subwords))

    write_to_file("data/subwords.json", dictionary)

    # Subword pool for subword embedding
    # subword_pool = np.unique(list(chain.from_iterable(subwords)))
    subword_pool = dict(Counter(list(chain.from_iterable(subwords))))
    # print(subword_pool)
    print("Raw number of subwords: {:8d}".format(len(subword_pool)))
    write_to_file("data/subword_pool.json", subword_pool)
    def process_output(self, snapshot_num):
        for output_num, outut_folder in enumerate(self.output_folders):
            uncert_path = os.path.join(outut_folder, 'uncert_{0}.txt'.format(snapshot_num))
            label_path = os.path.join(outut_folder, 'label_{0}.txt'.format(snapshot_num))

            utils.write_to_file(uncert_path, self.uncertainty[:, output_num])
            utils.write_to_file(label_path, self.correct)
Beispiel #6
0
 def test_add_all(self):
     paths_contents = [('1.txt', '1\n'),
                       (os.path.join('dir', '2.txt'), '2\n')]
     for path, content in paths_contents:
         write_to_file(path, content)
     Command.cmd_add('.')
     self._check_blob_and_index(*paths_contents)
Beispiel #7
0
def create_and_upload(template_s3_url, local_param_file, local_userinput_file,
                      s3bucket):
    '''
    Main function - parse cloudformation template from S3URL and generate
    parameter.json and parameter.yaml files.
    '''
    try:
        result = cfn_client.validate_template(TemplateURL=template_s3_url)

        # create parameter.json structure
        param_obj = result['Parameters']
        for obj in param_obj:
            [obj.pop(k) for k in list(obj.keys()) if k != 'ParameterKey']
            obj['ParameterValue'] = "{{ " + obj['ParameterKey'] + " }}"

        param_str = json.dumps(param_obj, indent=2, separators=(',', ': '))

        dict = {"\"{{": "{{", "}}\"": "}}"}
        cfn_params = search_and_replace(param_str, dict)

        userinput_content = generate_userinput_params(param_obj)
        #cfn_params = search_and_replace(json.dumps(param_obj, indent=2, separators=(',', ': ')))

        # generate user_input yaml parameter file and upload to s3
        utils.write_to_file('/tmp/' + local_userinput_file, userinput_content)
        utils.upload_to_s3(s3bucket, '/tmp/' + local_userinput_file,
                           local_userinput_file)

        # generate parameter.json file and upload to s3
        utils.write_to_file('/tmp/' + local_param_file, cfn_params)
        utils.upload_to_s3(s3bucket, '/tmp/' + local_param_file,
                           local_param_file)

    except Exception as e:
        print(e)
Beispiel #8
0
    def create_repository(workspace, bare=False):
        if not os.path.exists(workspace):
            os.mkdir(workspace)
        os.chdir(workspace)

        if not bare:
            os.mkdir(Repository.GIT_DIR)
            os.chdir(Repository.GIT_DIR)

        for new_dir in Repository.INIT_DIR:
            os.mkdir(new_dir)

        for file_and_content in Repository.INIT_FILE:
            file_name = file_and_content[0]
            content = file_and_content[1]
            write_to_file(file_name, content)

        init_config_dict = {
            "core": {
                "repositoryformatversion": "0",
                "filemode": "true",
                "bare": str(bare).lower(),
                "logallrefupdates": "true",
            }
        }

        content = Config.create_config(init_config_dict)
        write_to_file("config", content)
Beispiel #9
0
    def commit(self, msg, ref="HEAD"):
        cur_tree = self.index.do_commit(self.workspace)
        branch_name = read_file(os.path.join(self.workspace, ".git", "HEAD")).strip("\n").rsplit("/", 1)[-1]
        ref_path = os.path.join(self.workspace, ".git", "refs", "heads", branch_name)
        parent_sha1 = None
        if os.path.exists(ref_path):
            parent_sha1 = read_file(ref_path)
        committer_name = self.config.config_dict["user"]["name"]
        committer_email = "<%s>" % (self.config.config_dict["user"]["email"])
        commit_time = int(time.time())

        # TO FIX
        commit_timezone = time.strftime("%z", time.gmtime())

        commit = Commit(
            self.workspace,
            tree_sha1=cur_tree.sha1,
            parent_sha1=parent_sha1,
            name=committer_name,
            email=committer_email,
            timestamp=commit_time,
            timezone=commit_timezone,
            msg=msg,
        )
        write_object_to_file(commit.path, commit.content)
        write_to_file(ref_path, commit.sha1)
Beispiel #10
0
 def setUp(self):
     self.workspace = 'test_commit'
     Command.cmd_init(self.workspace)
     self.path = '1.txt'
     self.content = '1\n'
     write_to_file(self.path, self.content)
     Command.cmd_add(self.path)
Beispiel #11
0
 def process_deaths(self):
     """
     Processes the global confirmed in-memory records
     """
     logger = logging.getLogger("Deaths")
     csse_handler_global = CSSEGISandData(logger, USFileType=False)
     global_deaths_gps_data = csse_handler_global.parse_csv_file_contents(
         self.global_deaths_dataset)
     self.date_keys_sanity_check(csse_handler_global.date_keys)
     # The header of the US file has the dates start at offset 12
     # perhaps we should validate this never changes, or the data will
     # be out of sync
     csse_handler_us = CSSEGISandData(logger,
                                      USFileType=True,
                                      offset_dates=12)
     us_deaths_gps_data = csse_handler_us.parse_csv_file_contents(
         self.us_deaths_dataset)
     self.date_keys_sanity_check(csse_handler_us.date_keys)
     deaths_gps_data = utils.merge_dict(global_deaths_gps_data,
                                        us_deaths_gps_data)
     utils.write_to_file(
         "data/deaths.json",
         csse_handler_global.generate_globe_json_string(
             deaths_gps_data, self.global_population_dataset,
             self.global_population))
Beispiel #12
0
def preliminary_cleanup(corpus, rule, output=None, thread=None, limit=None):
    """
    Preliminary cleanup the corpus to make it easier for further
    processing methods. This method can be used to correct the
    missing spaces after punctuations any other customized rules
    can be added to the rule file, see punctuation_cleanup in utils
    for the formatting of the rules.

    Arguments:
        corpus(str): Path to the corpus file.
        rule(str): Path to the processing rule file.
        thread(int): Number of thread to process.
        output(str): Path to the output file.
    """
    # output name
    if output is None:
        output = corpus[:-4] + "_preprocessed.tsv"

    # Load rules
    rules = load_rules(rule)
    # Load data
    raw_data = readlines(corpus, limit=limit, skip=True)

    # Threading
    param = (rules, "PRELIMINARY")
    result = generic_threading(thread, raw_data, punctuation_cleanup, param)

    # Write result to file
    write_to_file(output, result)
Beispiel #13
0
def abd(**kwargs):
    global command
    import utils
    if not 'simple_update' in kwargs:
        hs,scs,cls = kwargs['heuristic_search'],kwargs['set_cover_search'],kwargs['clause_level_search']
        special_search = hs or scs or cls
    else:
        special_search = False    
    if not special_search:
        options = [clingo,bk,ex,abdinp,exmpl_constr,'1','--asp09'] 
    else:
        options = [clingo,bk,ex,abdinp,exmpl_constr,'1','--asp09']    
        covfile = gl.example_coverage_constr  # use the hard example coverage constraints
        covconstr = gl.example_constraints_cover
        covcontent = '\n'.join(covconstr)
        utils.write_to_file(covfile,covcontent)
    command = ' '.join(options)
    out = os.popen(command).read().split('.')
    

    #out = [x.strip() for x in out if not x.strip() == '']
    
    out = filter(lambda x: 'OPTIMUM FOUND' not in x,out)
    ##out = out[len(out)-1].split('.') # get the last one, which is the optimum
    out = filter(lambda x: not x.strip() in '',[x.strip() for x in out ])
    
    
    
    if special_search: # re-write the soft constraints
        covfile = gl.example_coverage_constr  # use the hard example coverage constraints
        covconstr = gl.heuristic_example_constraints_cover
        covcontent = '\n'.join(covconstr)
        utils.write_to_file(covfile,covcontent)
    return out
Beispiel #14
0
 def setUp(self):
     self.workspace = 'test_diff'
     Command.cmd_init(self.workspace)
     self.old_content = '''
         The Way that can be told of is not the eternal Way;
         The name that can be named is not the eternal name.
         The Nameless is the origin of Heaven and Earth;
         The Named is the mother of all things.
         Therefore let there always be non-being,
           so we may see their subtlety,
         And let there always be being,
           so we may see their outcome.
         The two are the same,
         But after they are produced,
           they have different names.
     '''
     self.new_content = '''
         The Nameless is the origin of Heaven and Earth;
         The named is the mother of all things.
         
         Therefore let there always be non-being,
           so we may see their subtlety,
         And let there always be being,
           so we may see their outcome.
         The two are the same,
         But after they are produced,
           they have different names.
         They both may be called deep and profound.
         Deeper and more profound,
         The door of all subtleties!
     '''
     self.file_list = [('1.txt', self.old_content), ('2.txt', self.old_content)]
     for path, content in self.file_list:
         write_to_file(path, content)
         Command.cmd_add(path)
Beispiel #15
0
def LOO(segmentation_list, save_path, model_save_prefix="LOO_"): 

	# for each in segmentation list, omit and train separate model 

	# 	run the rigid registration

	# 	run the deformable registration 

	# 	save the model parameters

	for loo_id, (seg_path, identifier) in enumerate(segmentation_list): 

		identifier = identifier.replace(" ", "")

		print "executing LOO model with ", identifier, " left out"
		print "---"*10

		rest = segmentation_list[:loo_id] + segmentation_list[loo_id+1:]

		ss, Rs, ts, _, _, _, _, mean_shape = initial_rigid_registration(rest, save_path)

		Gs, Ws, Ps, corresponded_targets, disparities = deformable_registration(rest, save_path, Rs, ss, ts, mean_shape, apply_scale=False)

		write_to_file(model_save_prefix + identifier, (mean_shape, Gs, Ws, Ps, corresponded_targets, disparities))

	return 
Beispiel #16
0
 def create_repository(workspace, bare=False):
     if not os.path.exists(workspace): 
         os.mkdir(workspace)
     os.chdir(workspace)
             
     if not bare:
         os.mkdir(Repository.GIT_DIR)
         os.chdir(Repository.GIT_DIR)
             
     for new_dir in Repository.INIT_DIR:
         os.mkdir(new_dir)
             
     for file_and_content in Repository.INIT_FILE:
         file_name = file_and_content[0]  
         content = file_and_content[1]
         write_to_file(file_name, content)
         
         
     init_config_dict = {
         'core': {
             'repositoryformatversion' : '0',
             'filemode' : 'true',
             'bare' : str(bare).lower(),
             'logallrefupdates' : 'true',
         }
     }
         
     content = Config.create_config(init_config_dict)
     write_to_file('config', content)
Beispiel #17
0
def solve_from_file(input_file, output_directory, params=[]):
    print('Processing', input_file)

    basename, filename = os.path.split(input_file)
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    output_file = utils.input_to_output(input_file, output_directory)
    optimal_tracker = output_file + ".optimal"

    existing_optimal = ((not os.path.exists(optimal_tracker)) or (utils.read_file(optimal_tracker)[0][0] == "True"))
    if os.path.exists(output_file) and existing_optimal:
        if not os.path.exists(optimal_tracker):
            utils.write_to_file(output_file + ".optimal", str(True))
        print("Skipping, already solved optimal")
    elif os.path.exists(output_file) and False:
        print("Skipping non-optimal")
    else:
        input_data = utils.read_file(input_file)
        num_of_locations, num_houses, list_locations, list_houses, starting_car_location, adjacency_matrix = data_parser(input_data)
        sol = solve(list_locations, list_houses, starting_car_location, adjacency_matrix, params=params)
        if sol:
            car_path, drop_offs, is_optimal = sol
            convertToFile(car_path, drop_offs, output_file, list_locations)
            utils.write_to_file(output_file + ".optimal", str(is_optimal))
        else:
            print("no feasible solution")
Beispiel #18
0
def main():
    start = time.time()
    # # load data.
    train_data = np.load(PATH_TRAIN_DATA)
    train_label = np.load(PATH_TRAIN_LABEL)
    test_data = np.load(PATH_TEST_DATA)
    test_label = np.load(PATH_TEST_LABEL)

    # # shuffle the data
    train_data, train_label = shuffle(train_data, train_label)
    # test_data, test_label = shuffle(test_data, test_label)

    # # image preprocessing.
    train_data = preprocessing.scale(train_data)
    test_data = preprocessing.scale(test_data)

    # # range label in [0,2]
    train_label += 1
    test_label += 1

    # # use a fraction of training data
    train_data = train_data[0:10000, :]
    train_label = train_label[0:10000]

    # # prepare label for one-vs-rest
    train_labels = one_vs_rest_labels(train_label)

    print('Training...')
    svms = []
    for cls in range(NUM_CLS):
        print('Training for class {}'.format(cls))
        clf = svm.SVC(kernel='rbf', C=2, probability=True)
        clf.fit(train_data, train_labels[cls])
        svms.append(clf)

    # # --------------- new version: svms.predict_proba --------------- # #
    print('Testing...')
    predicts = []
    for cls in range(NUM_CLS):
        cls_predict = svms[cls].predict_proba(test_data)
        predicts.append(cls_predict[:, 1])
    # # --------------- new version: svms.predict_proba --------------- # #

    # # --------------- old version: svms.predict --------------- # #
    # print('Testing...')
    # predicts = []
    # for cls in range(NUM_CLS):
    #     cls_predict = svms[cls].predict(test_data)
    #     predicts.append(cls_predict)
    # # --------------- old version: svms.predict --------------- # #

    predicts = np.stack(predicts, axis=1)
    predict = np.argmax(predicts, axis=1)
    n_accuracy = np.where(np.equal(predict, test_label))[0]
    accuracy = n_accuracy.shape[0] / test_data.shape[0]
    print('final accuracy {}'.format(accuracy))

    cls_time = time.time() - start
    write_to_file('./logs/p1_accuracy.txt', '{}\n'.format(accuracy), False)
    write_to_file('./logs/p1_time.txt', '{}\n'.format(cls_time), True)
Beispiel #19
0
 def setUp(self):
     self.workspace = "test_rm"
     Command.cmd_init(self.workspace)
     self.path = "1.txt"
     content = "1\n"
     write_to_file(self.path, content)
     Command.cmd_add(self.path)
Beispiel #20
0
def preprocess_text(inputfile, outputfile):
    data = utils.read_file(inputfile)
    words = tokenize(data)
    meaningful_words = remove_stopwords(words)
    stemmed_words = stem_words(meaningful_words)
    stemmed = ' '.join(stemmed_words)
    utils.write_to_file(outputfile, stemmed)
def test():
    print("testing...")
    if not args.use_gpu:
        state_dict = torch.load(args.save_path, map_location='cpu')
    else:
        state_dict = torch.load(args.save_path)
    ner.load_state_dict(state_dict)
    ner.eval()
    test_data = DGTestDataset(file_path=args.test_data, maxlen=args.max_len)
    test_dl = DataLoader(test_data, batch_size=args.batch_size, num_workers=2)

    preds = []
    lengths = []
    sent2ids = []
    print("===========testing===========")
    with torch.no_grad():
        for sent2id, length in test_dl:
            sent2ids.append(sent2id)
            if args.use_gpu:
                sent2id = sent2id.to(device)
            # shape:[batch, seqlen, len(tag2id)]
            # pred = ner(sent2id)
            # shape:[batch, seqlen]
            # pred = torch.argmax(pred, dim=-1)
            pred = ner(sent2id)

            # preds.append(pred.cpu().numpy())
            preds.append(pred)
            lengths.append(length.numpy())

    preds = np.concatenate(preds, axis=0)
    lengths = np.concatenate(lengths, axis=0)
    sent2ids = np.concatenate(sent2ids, axis=0)

    write_to_file(preds, lengths, sent2ids, args.result_file, tag2id)
Beispiel #22
0
def save_graph(G, size, input_dictionary):
    num_loc = size
    num_home = size // 2
    # generate loc names
    name_set = set()
    while len(name_set) < num_loc:
        name_set.add(''.join([
            random.choice(string.ascii_letters + string.digits)
            for i in range(20)
        ]))
    name_list = list(name_set)
    loc_list = [i for i in range(num_loc)]
    home_list = [i for i in range(num_loc)]
    random.shuffle(loc_list)
    random.shuffle(home_list)
    home_list = home_list[:num_home]
    loc_name = [name_list[i] for i in loc_list]
    home_name = [name_list[i] for i in home_list]
    start_loc = loc_name[random.randint(0, num_loc - 1)]

    data = ''
    data += str(num_loc) + '\n'
    data += str(num_home) + '\n'
    data += ' '.join(loc_name) + '\n'
    data += ' '.join(home_name) + '\n'
    data += start_loc + '\n'

    mat = nx.convert_matrix.to_numpy_matrix(G)
    data += parse_mat(mat)

    utils.write_to_file(input_directory + '/' + str(size) + '.in', data)
Beispiel #23
0
 def test_status_untracked_files(self):
     path, content = ('1.txt', '1\n')
     write_to_file(path, content)
     repo = Repository()
     untracked_files = repo.get_untracked_files()
     self.assertEqual(untracked_files, ['1.txt'])
     Command.cmd_status()
Beispiel #24
0
 def switch_branch(self, name):
     if not self._check_branch_exists(name):
         print "error: branch '%s' did not match any branches known to git." % (name)
         exit(1)
     write_to_file(HEAD_PATH, 'ref: refs/heads/%s' % name)
         
     
Beispiel #25
0
def validate_all_outputs(input_directory, output_directory, params=[]):
    input_files = utils.get_files_with_extension(input_directory, '.in')
    output_files = utils.get_files_with_extension(output_directory, '.out')
    for file in output_files:
        print(file)

    output_directory += '/'
    all_results = []
    string = ''
    for input_file in input_files:
        basename, filename = os.path.split(input_file)
        if '200' in filename:
            continue
        output_file = utils.input_to_output(input_file, output_directory)
        print(input_file, output_file)
        if output_file not in output_files:
            print(f'No corresponding .out file for {input_file}')
            results = (None, None,
                       f'No corresponding .out file for {input_file}')
        else:
            results = validate_output(input_file, output_file, params=params)
        string += filename + " " + results[2] + "\n"
        all_results.append((input_file, results))
    utils.write_to_file('validations.out', string)
    return all_results
Beispiel #26
0
def abd(**kwargs):
    global command
    import utils
    if not 'simple_update' in kwargs:
        hs,scs,cls = kwargs['heuristic_search'],kwargs['set_cover_search'],kwargs['clause_level_search']
        special_search = hs or scs or cls
    else:
        special_search = False    
    if not special_search:
        options = [clingo,bk,ex,abdinp,exmpl_constr,'1','--asp09'] 
    else:
        options = [clingo,bk,ex,abdinp,exmpl_constr,'1','--asp09']    
        covfile = gl.example_coverage_constr  # use the hard example coverage constraints
        covconstr = gl.example_constraints_cover
        covcontent = '\n'.join(covconstr)
        utils.write_to_file(covfile,covcontent)
    command = ' '.join(options)
    out = os.popen(command).read().split('.')
    
    #out = cmd(options)
    #out = out.split('.')
    out = [x.strip() for x in out if not x.strip() == '']
    if special_search: # re-write the soft constraints
        covfile = gl.example_coverage_constr  # use the hard example coverage constraints
        covconstr = gl.heuristic_example_constraints_cover
        covcontent = '\n'.join(covconstr)
        utils.write_to_file(covfile,covcontent)
    return out
Beispiel #27
0
def solve_from_file(input_file, output_directory, params=[]):
    # print('Processing', input_file)

    input_data = utils.read_file(input_file)
    number_of_kingdoms, list_of_kingdom_names, starting_kingdom, adjacency_matrix = data_parser(
        input_data)
    closed_walk, conquered_kingdoms = solve(list_of_kingdom_names,
                                            starting_kingdom,
                                            adjacency_matrix,
                                            params=params)

    if closed_walk == "Error":
        print("Error")
    else:
        basename, filename = os.path.split(input_file)
        output_filename = utils.input_to_output(filename)
        output_file = f'{output_directory}/{output_filename}'
        if not os.path.exists(output_directory):
            os.makedirs(output_directory)
        utils.write_data_to_file(output_file, closed_walk, ' ')
        utils.write_to_file(output_file, '\n', append=True)
        utils.write_data_to_file(output_file,
                                 conquered_kingdoms,
                                 ' ',
                                 append=True)
Beispiel #28
0
 def setUp(self):
     self.workspace = 'test_commit'
     Command.cmd_init(self.workspace)
     self.path = '1.txt'
     self.content = '1\n'
     write_to_file(self.path, self.content)
     Command.cmd_add(self.path)
Beispiel #29
0
def auto_download(researcher_papers_location=None,
                  researchers_file_location=None):
    # researcher_papers_location = "researchers_to_papers.p"

    if os.path.isfile(researcher_papers_location):
        researcher_papers = pickle.load(open(researcher_papers_location, 'rb'))
    else:
        researchers = utils.read_file(file_location=researchers_file_location,
                                      sep="|").split("|")
        researchers = [name.title() for name in researchers
                       ]  # Convert "David HSU" to "David Hsu"
        researcher_papers = load_papers_title(researchers)

    for researcher, papers in researcher_papers.items():
        papers_DIR = os.path.join("papers", researcher.replace(" ", "_"), "")
        utils.is_folder_exists_create_otherwise(papers_DIR)

        try:
            arxiv_crawler.download_list_of_papers_serial(titles=papers,
                                                         dirname=papers_DIR,
                                                         my_api_key=my_api_key,
                                                         my_cse_id=my_cse_id)
            del researcher_papers[researcher]
        except Exception as e:
            completed_up_to = int(utils.read_file("index_marker.txt"))
            researcher_papers[researcher] = researcher_papers[researcher][
                completed_up_to:]
            pickle.dump(researcher_papers,
                        open(researcher_papers_location, "wb"))
            utils.write_to_file("researchers1.txt",
                                "\n".join(researcher_papers.keys()))
            logging.error(str(e))
            break
Beispiel #30
0
 def test_status_untracked_files(self):
     path, content = ('1.txt', '1\n')
     write_to_file(path, content)
     repo = Repository()
     untracked_files = repo.get_untracked_files()
     self.assertEqual(untracked_files, ['1.txt'])
     Command.cmd_status()
Beispiel #31
0
def main():
    # parse args
    ap = argparse.ArgumentParser()
    ap.add_argument('-i', '--input', type=str, required=True, help='path to input video file')
    ap.add_argument('-o', '--output', type=str, default='./', help='path to output directory')
    args = vars(ap.parse_args())
    in_vid = args['input']
    out_dir = args['output']
    video_name = in_vid.split('/')[-1].split('.')[0]

    # initialization
    model = Model(winname)
    view = View(winname)
    controller = Controller(winname, view, model)
    cap = utils.load_video(in_vid)
    frame_id = 0

    cv2.namedWindow(winname)

    # initial frame for selecting roi
    success, orig = cap.read()
    if not success:
        return

    # create dir to store output roi
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    model.curr_frame = orig.copy()
    view.freeze_frame(controller, model)
    utils.tracker_init(model, trackers, tracking, files, out_dir, video_name, frame_id)

    # play video
    while cap.isOpened():
        success, orig = cap.read()
        if not success:
            break
        model.curr_frame = orig.copy()
        frame_id += 1

        rois = utils.track(winname, trackers, model)
        view.show(model)

        # MAC: SPACE (32) ENTER (13) DELETE (8) ESC (27)
        key = cv2.waitKey(1)
        if key == 32:
            view.freeze_frame(controller, model)
            cv2.setMouseCallback(winname, controller.empty_click_event)
            utils.tracker_init(model, trackers, tracking, files, out_dir, video_name, frame_id)
        elif key == ord('q'):
            break

        # write rois to txt file
        utils.write_to_file(files, frame_id, rois)

    # clean up
    cv2.destroyAllWindows()
    for f in files:
        f.close()
Beispiel #32
0
 def test_diff_cached(self):
     Command.cmd_commit('first ci')
     write_to_file(self.file_list[0][0], self.new_content)
     Command.cmd_rm(self.file_list[1][0])
     new_path = '3.txt'
     write_to_file(new_path, self.new_content)
     Command.cmd_add('.')
     Command.cmd_diff(True, False)
Beispiel #33
0
 def test_diff_cached(self):
     Command.cmd_commit('first ci')
     write_to_file(self.file_list[0][0], self.new_content)
     Command.cmd_rm(self.file_list[1][0])
     new_path = '3.txt'
     write_to_file(new_path, self.new_content)
     Command.cmd_add('.')
     Command.cmd_diff(True, False)
Beispiel #34
0
    def run_build_script(self, build_dir, template):
        script_name = 'build.sh'
        script_path = join(build_dir, script_name)
        print('Writing build script to %s' % script_path)
        write_to_file(script_path, template)

        print('Starting build')
        subprocess.check_call(['bash', script_name], cwd=build_dir)
def main():
    testfile_path = args.file
    tokens = tokenize_file(testfile_path)
    recognized = recognize_file(testfile_path)
    print(recognized)
    res_filename = sys.modules[__name__].__file__.rpartition(
        ".")[0] + "_result.txt"
    write_to_file(res_filename, "\n".join(tokens))
Beispiel #36
0
def preprocess_gensim(inputfile, outputfile):
    data = utils.read_file(inputfile)
    data_words = gensim.utils.simple_preprocess(data, deacc=True)
    data_words_nostop = bow_service.remove_stopwords([data_words])
    data_lemmatized = bow_service.lemmatization(
        data_words_nostop, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])
    # print ' '.join(data_lemmatized[0])
    utils.write_to_file(outputfile, ' '.join(data_lemmatized[0]))
def extract_mentions(path, trim=True):
    """
    """
    entities = merge_dict(path, trim=trim)

    mentions = list(entities.keys())
    print(len(mentions))
    write_to_file("mention_list.txt", mentions)
    pass
Beispiel #38
0
    def test_append_to_file(self):
        test_data = "some wicked cool stuff"
        test_data2 = "even more test data"

        utils.write_to_file(self.test_file, test_data)
        utils.append_to_file(self.test_file, test_data2)

        with open(self.test_file, "r") as f:
            self.assertEqual(f.read(), test_data + test_data2)
def edit_question_by_id(question_id, title, message):
    data = []
    questions = utils.open_file(questions_data, QUESTION_HEADER)
    for row in questions:
        if row['id'] == question_id:
            row['title'] = title
            row['message'] = message
        data.append(row)
    utils.write_to_file(questions_data, questions, QUESTION_HEADER)
Beispiel #40
0
def process_app_data(metadata, cmd_string, input_filepath, stdout, stderr, return_code):
  """
  <Purpose>
    Execute the given command and redirect input (as necessary).

  <Arguments>
    metadata:
      The metadata dictonary.

    cmd_string:
      A string of the command that was provided to execute the build or test 
      (e.g. "python test.py", "make").

    input_filepath:
      The filepath of the file from which stdin should be read; will be None if 
      no explicit file specified

    stdout:
      A string representing the stdout from the command that was run.

    stderr:
      A string representing the stderr from the command that was run.

    return_code:
      An integer representing the return code from the command that was run.

  <Exceptions>
    TBD.

  <Return>
    None.
  """

  metadata['application']['command'] = cmd_string
  metadata['application']['return_code'] = return_code

  cwd = os.getcwd()

  # For the stdin, stdout and stderr, write each to a file, hash it, and store 
  # the hash and filepath to the metadata
  if input_filepath:
    saved_input_path = os.path.join(cwd,"in")
    shutil.copyfile(input_filepath, saved_input_path)
    metadata['application']['input_hash'] = utils.get_hash(saved_input_path)
    metadata['application']['input_path'] = saved_input_path
  else:
    metadata['application']['input_hash'] = None
    metadata['application']['input_path'] = None

  saved_output_path = os.path.join(cwd,"out")
  utils.write_to_file(stdout, saved_output_path)
  metadata['application']['output_hash'] = utils.get_hash(saved_output_path)
  metadata['application']['output_path'] = saved_output_path
  saved_err_path = os.path.join(cwd,"err")
  utils.write_to_file(stderr, saved_err_path)
  metadata['application']['err_hash'] = utils.get_hash(saved_err_path)
  metadata['application']['err_path'] = saved_err_path
Beispiel #41
0
def full_page():
    title = 'Liste des étudiants'
    
    doc = FileTemplate('base.html')
    html = doc.render({
        'title' : title,
        'content' : generate_02(etudiants2)
    })
    write_to_file('output/liste_etudiants.html', html)
Beispiel #42
0
def main(argv):
    del argv

    if FLAGS.rm:
        os.remove(FLAGS.out)
    else:
        if FLAGS.out in os.listdir('./'):
            logging.fatal(('%s is not empty. Make sure you have'
                           ' archived previously generated data. '
                           'Try --rm flag which will automatically'
                           ' delete previous data.') % FLAGS.out)

    # for reproducing purpose
    # np.random.seed(100)

    trials = FLAGS.trials
    freq = FLAGS.freq
    T = FLAGS.T
    inputnum = FLAGS.inputnum if FLAGS.minimax else 1

    # policies to be compared
    # add your methods here
    policies = [MultiUCB(FLAGS.alpha), LinUCB(FLAGS.alpha_LinUCB, FLAGS.T)]

    for policy in policies:
        logging.info('run policy %s' % policy.name)
        for trial in range(trials):
            if trial % 50 == 0:
                logging.info('trial: %d' % trial)

            minimax_regret = dict()

            for _ in range(inputnum):
                contexts = list(sphere_sampling(3, FLAGS.armnum))
                theta = [1, 0, 0]
                bandit = LinearBandit(contexts, theta)
                agg_regret = dict()
                # initialization
                bandit.init()
                policy.init(contexts)
                rewards = 0
                for t in range(0, T + 1):
                    if t > 0:
                        action = policy.choice(t)
                        reward = bandit.pull_arm(action)
                        policy.update(reward, action)
                        rewards += reward
                    if t % freq == 0:
                        agg_regret[t] = bandit.regret(rewards)
                for t in agg_regret:
                    minimax_regret[t] = max(minimax_regret.get(t, 0),
                                            agg_regret[t])
            # output one trial result into the output file
            write_to_file(dict({policy.name: minimax_regret}))

    # generate the final figure
    draw()
Beispiel #43
0
def ssh_to_vm(**kwargs):
    host_name = kwargs['host']
    node_details = cloud_hero.get_all_details()
    if not node_details.get(host_name):
        sys.exit('No node with name {} found!'.format(host_name))

    node_index = 0
    nodes_data = node_details[host_name]
    if len(nodes_data) > 1:
        nodes_format = ('{index:<10}{node[name]:<25}{node[public_ip]:<20}'
                        '{node[private_ip]:<20}{node[packages]:<20}'
                        '{environment[name]:<20}{environment[id]:<20}')
        print('Node exists in two environments:')
        print(nodes_format.format(**PROMPTER_KWARGS))
        for index, node_data in enumerate(nodes_data):
            node_data['node']['packages'] = ','.join(node_data['node']['packages'])
            print nodes_format.format(index=index, **node_data)
        user_prompt = 'Pick the node you want to ssh to'.format(nodes_format)
        node_index = click.prompt(user_prompt, default=0)
    node = nodes_data[node_index]

    remote_ip = node['node']['public_ip']
    remote_user = node['provider']['username']

    # Get key and write it to the local path
    expanded_file_path = os.path.expanduser(CLOUD_HERO_SSH_KEY)
    if not os.path.exists(expanded_file_path):
        ssh_key_content = cloud_hero.list_key()['content']
        write_to_file(ssh_key_content, expanded_file_path)
        os.chmod(expanded_file_path, 0600)

    # Connect to remote host.
    try:
        client = paramiko.SSHClient()
        client.load_system_host_keys()
        client.set_missing_host_key_policy(paramiko.WarningPolicy())
        print('*** Connecting to {} ...'.format(remote_ip))
        rsa_key = paramiko.RSAKey.from_private_key_file(expanded_file_path)
        client.connect(remote_ip, username=remote_user, pkey=rsa_key)

        chan = client.invoke_shell()
        print('*** Here we go!\n')
        interactive_shell(chan)
        chan.close()
        client.close()

    except Exception as e:
        print('*** Caught exception: %s: %s' % (e.__class__, e))
        traceback.print_exc()
        try:
            client.close()
        except:
            pass
        sys.exit(1)
Beispiel #44
0
 def setUp(self):
     self.workspace = 'test_branch'
     Command.cmd_init(self.workspace)
     Command.cmd_commit('first ci')
     self.file_list = [('1.txt', '1\n'), ('2.txt', '2\n')]
     for path, content in self.file_list:
         write_to_file(path, content)
         Command.cmd_add(path)
     Command.cmd_commit('master ci')
     
     self.new_branch = 'new_branch'
     Command.cmd_branch(self.new_branch)
Beispiel #45
0
 def commit(self, msg):
     new_tree = self.index.do_commit(self.workspace)
     
     committer_name = self.config.config_dict['user']['name']
     committer_email = '<%s>' %  (self.config.config_dict['user']['email'])
     commit_time = int(time.time())
     commit_timezone = time.strftime("%z", time.gmtime())
     
     commit = Commit(self.workspace, sha1=None, tree_sha1=new_tree.sha1, parent_sha1=self.head_tree, name=committer_name, email=committer_email, \
                     timestamp=commit_time, timezone=commit_timezone, msg=msg)
     write_object_to_file(commit.path, commit.content)
     write_to_file(self.head_path, commit.sha1)
Beispiel #46
0
 def setUp(self):
     self.workspace = 'test_reset'
     Command.cmd_init(self.workspace)
     
     self.path, self.content = ('1.txt', '1\n')
     write_to_file(self.path, self.content)
     Command.cmd_add(self.path)
     Command.cmd_commit('first ci')
     self.first_commit = Branch().head_commit
     
     write_to_file(self.path, '2.txt')
     Command.cmd_add(self.path)
     Command.cmd_commit('second ci')
Beispiel #47
0
 def _build_tree(path):
     dir_arr = []
     file_arr = []
     for name, entry in path.iteritems():
         if isinstance(entry, dict):
             mode = stat.S_IFDIR
             sha1 = _build_tree(entry).sha1
             dir_arr.append({'name':name, 'mode':mode, 'sha1':sha1})
         else:
             (mode, sha1) = entry
             file_arr.append({'name':name, 'mode':mode, 'sha1':sha1})
     newtree = Tree(sorted(dir_arr,key = lambda x:x['name']) + sorted(file_arr,key = lambda x:x['name']))
     write_to_file(newtree.path, newtree.content)
     return newtree
Beispiel #48
0
 def test_status_unstaged_files(self):
     file_list = [('1.txt', '1\n'), ('2.txt', '2\n')]
     for path, content in file_list:
         write_to_file(path, content)
         Command.cmd_add(path)
     
     write_to_file(file_list[0][0], '11\n')
     os.remove(file_list[1][0])
     
     repo = Repository()
     unstaged_files = repo.get_unstaged_files()
     
     self.assertEqual(unstaged_files['modified'], [file_list[0][0]])
     self.assertEqual(unstaged_files['deleted'], [file_list[1][0]])
     Command.cmd_status()
Beispiel #49
0
 def setUp(self):
     self.workspace = 'test_log'
     Command.cmd_init(self.workspace)
     
     self.path = '1.txt'
     self.content = '1\n'
     write_to_file(self.path, self.content)
     
     Command.cmd_add(self.path)
     Command.cmd_commit('first ci')
     
     second_content = '11\n'
     write_to_file(self.path, second_content)
     
     Command.cmd_add(self.path)
     Command.cmd_commit('second ci')
Beispiel #50
0
 def test_checkout(self):
     Command.cmd_checkout(self.new_branch)
     self.assertEqual(Branch().head_name, self.new_branch)
     
     write_to_file(self.file_list[0][0], '11\n')
     Command.cmd_rm(self.file_list[1][0])
     new_path = '3.txt'
     new_content = '3\n'
     write_to_file(new_path, new_content)
     Command.cmd_add('.')
     Command.cmd_commit('branch ci')
     
     Command.cmd_checkout('master')
     self.assertTrue(os.path.exists(self.file_list[1][0]))
     self.assertFalse(os.path.exists(new_path))
     self.assertEqual(read_file(self.file_list[0][0]), self.file_list[0][1])
def extract_assets(game_object, out_dir):
	from unitypack.export import OBJMesh

	if game_object.mesh:
		write_to_file(
			os.path.join(out_dir, game_object.mesh.name + ".obj"),
			OBJMesh(game_object.mesh.object).export()
		)
	for material in game_object.materials:
		if material.shader:
			extract_shader(material.shader, out_dir)
		for texture in material.textures.values():
			extract_texture(texture.object, out_dir)

	for child in game_object.children:
		extract_assets(child, out_dir)
def main():
	arg_parser = argparse.ArgumentParser()
	arg_parser.add_argument("files", nargs="+", help="the unity3d files")
	arg_parser.add_argument("id", help="the id of the base asset")
	arg_parser.add_argument("output", help="the output directory")
	arg_parser.add_argument("-q", action="store_true")
	arg_parser.add_argument("-qq", action="store_true")
	args = arg_parser.parse_args(sys.argv[1:])

	Echo.quiet = args.q
	Echo.very_quiet = args.qq

	base_id = int(args.id)

	redefine_shader()
	env = UnityEnvironment()

	for file in args.files:
		info(f"Reading {file}")
		f = open(file, "rb")
		env.load(f)

	for bundle in env.bundles.values():
		for asset in bundle.assets:
			info(f"Parsing {asset.name}")
			game_object = get_by_id(base_id, asset)
			if not game_object:
				info(f"{base_id} not found in {asset.name}")
				break

			root_object = get_root_object(game_object)
			root_transform = get_transform(root_object)

			tree = Tree()
			traverse_transforms(root_transform, tree)

			# create output directory
			out_dir = os.path.join(args.output, tree.root.name)
			if not os.path.exists(out_dir):
				os.mkdir(out_dir)
			# export the tree as json
			json_str = json.dumps(tree.root, cls=GameObjectEncoder, indent=4)
			write_to_file(os.path.join(out_dir, "data.json"), json_str)
			# extract referenced textures, models and shaders
			extract_assets(tree.root, out_dir)
Beispiel #53
0
 def commit(self, msg, ref='HEAD'):
     cur_tree = self.index.do_commit(self.workspace)
     branch_name = read_file(os.path.join(self.workspace, '.git', 'HEAD')).strip('\n').rsplit('/', 1)[-1]
     ref_path = os.path.join(self.workspace, '.git', 'refs', 'heads', branch_name)
     parent_sha1 = None
     if os.path.exists(ref_path):
         parent_sha1 = read_file(ref_path) 
     committer_name = self.config.config_dict['user']['name']
     committer_email = '<%s>' %  (self.config.config_dict['user']['email'])
     commit_time = int(time.time())
     
     #TO FIX
     commit_timezone = time.strftime("%z", time.gmtime())
     
     commit = Commit(self.workspace, tree_sha1=cur_tree.sha1, parent_sha1=parent_sha1, name=committer_name, email=committer_email, \
                     timestamp=commit_time, timezone=commit_timezone, msg=msg)
     write_object_to_file(commit.path, commit.content)
     write_to_file(ref_path, commit.sha1)
Beispiel #54
0
 def test_commit_twice(self):
     Command.cmd_commit('first ci')
     parent_sha1 = Branch().head_commit
     
     second_content = '11\n'
     write_to_file(self.path, second_content)
     
     new_path = '2.txt'
     new_content = '2\n'
     write_to_file(new_path, new_content)
     
     Command.cmd_add('.')
     Command.cmd_commit('second ci')
     
     commit = Commit(sha1=Branch().head_commit)
     self.assertEqual(parent_sha1, commit.parent_sha1)
     tree = Tree(sha1=commit.tree)
     objects = tree.parse_objects()
     self.assertEqual(objects[self.path]['sha1'], Blob(second_content).sha1)
     self.assertEqual(objects[new_path]['sha1'], Blob(new_content).sha1)
def FormatChangeSet(commit_info):
    '''
       1. Parse the change set
       2. Store the files to hard disk
       3. Replace the file stream to file name in change list
    '''
    uuid = commit_info["uuid"]
    transaction = commit_info["transaction"]
    changes = commit_info["changes"]
    for change in changes:
        if change.has_key("filecontent"):
            index = changes.index(change)
            file = change["filename"].replace("/", "_")
            file = file.replace(" ", "_")
            filename = "_".join([uuid, transaction, file])
            path = os.path.join(config.server["static"], "files", filename)
            log.debug("Write file to path: %s" % path)
            utils.write_to_file(path, change["filecontent"])
            changes[index]["fileurl"] = "/".join(["static","files", filename]) 
            if change["diffcontent"]:
                diff_path = "%s.diff" % path
                changes[index]["diffpath"] = diff_path
                utils.write_to_file(diff_path, change["diffcontent"])
    return commit_info
def extract_texture(texture, out_dir, flip=True):
	filename = texture.name + ".png"
	try:
		image = texture.image
	except NotImplementedError:
		error(f"WARNING: Texture format not implemented. Skipping {filename}.")
		return

	if image is None:
		error("WARNING: {filename} is an empty image")
		return

	info("Decoding {texture.name}")
	# Texture2D objects are flipped
	if flip:
		img = ImageOps.flip(image)
	# PIL has no method to write to a string :/
	output = BytesIO()
	img.save(output, format="png")
	write_to_file(
		os.path.join(out_dir, filename),
		output.getvalue(),
		mode="wb"
	)
Beispiel #57
0
 def test_status_uncommitted_files(self):
     file_list = [('1.txt', '1\n'), ('2.txt', '2\n')]
     for path, content in file_list:
         write_to_file(path, content)
         Command.cmd_add(path)
     Command.cmd_commit('first ci')
     
     write_to_file(file_list[0][0], '11\n')
     Command.cmd_rm(file_list[1][0])
     new_path = '3.txt'
     new_content = '3\n'
     write_to_file(new_path, new_content)
     Command.cmd_add('.')
     
     repo = Repository()
     uncommitted_files = repo.get_uncommitted_files()
     self.assertEqual(uncommitted_files['modified'], [file_list[0][0]])
     self.assertEqual(uncommitted_files['deleted'], [file_list[1][0]])
     self.assertEqual(uncommitted_files['new file'], [new_path])
     Command.cmd_status()
def handle_asset(asset, handle_formats, dir, flip, objMesh):
	for id, obj in asset.objects.items():
		try:
			otype = obj.type
		except Exception as e:
			error("[Error] %s" % (e))
			continue

		if otype not in handle_formats:
			continue

		d = obj.read()
		save_path = os.path.join(dir, obj.type, d.name)
		utils.make_dirs(save_path)

		if otype == "Mesh":
			try:
				mesh_data = None

				if not objMesh:
					mesh_data = BabylonMesh(d).export()
					utils.write_to_file(save_path + ".babylon", mesh_data, mode="w")

				mesh_data = OBJMesh(d).export()
				utils.write_to_file(save_path + ".obj", mesh_data, mode="w")
			except (NotImplementedError, RuntimeError) as e:
				error("WARNING: Could not extract %r (%s)" % (d, e))
				mesh_data = pickle.dumps(d._obj)
				utils.write_to_file(save_path + ".Mesh.pickle", mesh_data, mode="wb")

		elif otype == "TextAsset":
			if isinstance(d.script, bytes):
				utils.write_to_file(save_path + ".bin", d.script, mode="wb")
			else:
				utils.write_to_file(save_path + ".txt", d.script)

		elif otype == "Texture2D":
			filename = d.name + ".png"
			try:
				image = d.image
				if image is None:
					info("WARNING: %s is an empty image" % (filename))
					utils.write_to_file(save_path + ".empty", "")
				else:
					info("Decoding %r" % (d))
					img = image
					if flip:
						img = ImageOps.flip(image)
					img.save(save_path + ".png")
			except Exception as e:
				error("Failed to extract texture %s (%s)" % (d.name, e))