Python read_file Exemples, utilities.read_file Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : main.py Projet : dikoufu/Kaggle-3

def main(training_file, test_file, submission_file, ratio):
    data = utilities.read_file(training_file)
    test_data = utilities.read_file(test_file)

    print 'Preparing data...'
    x, y = preprocess.prepare_data(data)
    refid, x_test = preprocess.prepare_test_data(test_data)
    x, x_test = preprocess.preprocess_features(x, x_test)

    print 'Feature extracting...'
    x, x_test = feature_extraction.create_feature(x, y, x_test)

    indices = feature_extraction.get_best_k_feature_indices(x, y, 300)
    x = feature_extraction.get_best_k_features(x, indices)
    x_test = feature_extraction.get_best_k_features(x_test, indices)
    print 'Get %s features.' % len(x[0])

    x_train, x_cv, y_train, y_cv = cross_validation.train_test_split(
        x, y, test_size=.3, random_state=0)
    x_train, y_train = preprocess.down_sample(x_train, y_train, ratio)

    clf = classification.random_forest(x_train, y_train, x_cv, y_cv)

    print 'Predicting...'
    predict = clf.predict_proba(x_test)
    utilities.write_submission_file(submission_file, refid, predict)

Exemple #2

0

Afficher le fichier

Fichier : main.py Projet : FindBoat/Kaggle

def main(training_file, test_file, submission_file, ratio):
    data = utilities.read_file(training_file)
    test_data = utilities.read_file(test_file)

    print 'Preparing data...'
    x, y = preprocess.prepare_data(data)
    refid, x_test = preprocess.prepare_test_data(test_data)
    x, x_test = preprocess.preprocess_features(x, x_test)

    print 'Feature extracting...'
    x, x_test = feature_extraction.create_feature(x, y, x_test)

    indices = feature_extraction.get_best_k_feature_indices(x, y, 300)
    x = feature_extraction.get_best_k_features(x, indices)
    x_test = feature_extraction.get_best_k_features(x_test, indices)
    print 'Get %s features.' % len(x[0])

    x_train, x_cv, y_train, y_cv = cross_validation.train_test_split(
        x, y, test_size=.3, random_state=0)
    x_train, y_train = preprocess.down_sample(x_train, y_train, ratio)

    clf = classification.random_forest(x_train, y_train, x_cv, y_cv)

    print 'Predicting...'
    predict = clf.predict_proba(x_test)
    utilities.write_submission_file(submission_file, refid, predict)

Exemple #3

0

Afficher le fichier

Fichier : correct_dict.py Projet : rjrequina/Cebuano-Dictionary

def correct_ADV():
    contents = read_file('data/new-ADV.txt', strip=True, dict_format=True)
    entries = read_file('data/cebposdict-nc.txt', strip=True, dict_format=True)

    for key, value in contents.iteritems():
        if 'REM' in value:
            if key in entries:
                del entries[key]
        else:
            entries[key] = value

    if len(contents):
        write_file('data/cebposdict-nc.txt',
                   contents=[''],
                   add_newline=False,
                   mode='w')
        for key, value in sorted(entries.iteritems()):
            new_entry = [key + ' ']
            value = list(set(value))
            new_entry.append(' '.join(value))
            new_entry.append('\n')
            write_file('data/cebposdict-nc.txt',
                       contents=new_entry,
                       add_newline=False,
                       mode='a')
            new_entry = []

Exemple #4

0

Afficher le fichier

def scrape_news_contents():
    checkpoint = read_file("data/scraped/cp/news-links-cp.txt")
    start = int(checkpoint[0])
    if start == 501:
        print("Status: Finished!")
        return

    urls = read_file("data/scraped/news-links.txt", start=start)
    contents = []
    for idx, url in enumerate(urls):
        start += 1
        print("Link [" + str(start) + "]: " + url)
        page = urlopen(url)
        soup = BeautifulSoup(page, 'html.parser')
        div = soup.find('div', {
            'class': 'field-item even',
            'property': 'content:encoded'
        })
        for child in div.findChildren():
            contents.append(child.getText())
        write_file("data/scraped/news-raw-nc.txt",
                   contents=contents,
                   per_line=True,
                   mode="a")
        contents = []
        endpoints = [str(start + 1)]

        write_file("data/scraped/cp/news-links-cp.txt",
                   contents=endpoints,
                   mode="w")

Exemple #5

0

Afficher le fichier

Fichier : parse_ast.py Projet : rayngan999/obfuscation-fingerprinting

def extract_features_multiprocess(ast_directory, features_directory, feature_type_to_extract, js_keywords_file, cpu_to_relax):
    all_files = utilities.get_files_in_a_directory(ast_directory)
    temp = utilities.read_file(js_keywords_file)

    keywords_list = []
    for item in temp:
        keywords_list.append(item.strip())

    raw_extracted_files = utilities.get_files_in_a_directory(features_directory)
    completed_feature_files = []
    
    for unpacked in raw_extracted_files:
        completed_feature_files.append(unpacked.split('/')[-1].split('.')[0])
    
    raw_extracted_files = []
    remaining_unpacked_files = []

    if os.path.isfile('ast_parsing.log'):
        raw_processed = utilities.read_file('ast_parsing.log')
    else:
        raw_processed = []

    processed_log = set()
    for item in raw_processed:
        processed_log.add(item.split(' ')[0].split('.')[0])

    raw_processed = []

    for unpacked in all_files:
        if unpacked.split('/')[-1].split('.')[0] in completed_feature_files: # or unpacked.split('/')[-1].split('.')[0] in processed_log:
            continue
        else:
            remaining_unpacked_files.append(unpacked)
    
    completed_feature_files = []
    all_files = []
    processed_log = set()

    print(len(remaining_unpacked_files), 'files to process')
    pool = ThreadPool(processes=multiprocessing.cpu_count() - cpu_to_relax)

    try:
        if feature_type_to_extract == ALL:
            results = pool.starmap(new_walk, zip(all_files, itertools.repeat(features_directory)))
        elif feature_type_to_extract == NO_NAMES:
            results = pool.starmap(new_walk_no_names, zip(all_files, itertools.repeat(features_directory)))
        elif feature_type_to_extract == KEYWORD:
            results = pool.starmap(new_walk_reserved_words, zip(remaining_unpacked_files, itertools.repeat(features_directory), itertools.repeat(keywords_list)))
        
        # for f_name in all_files:
        #     new_walk_reserved_words(f_name, features_directory, keywords_list)
        # utilities.append_list(f_name.replace(directory_path,result_directory).replace('json', 'txt'), raw_features)
    except Exception as e:
        print ('Exception in main thread: ', str(e))

    pool.close()
    pool.join()

    return

Exemple #6

0

Afficher le fichier

Fichier : base.py Projet : cloudlinuxadmin/mysql-governor

    def _get_previous_version(self):
        """
        Get current installed mysql version from cache file
        """
        if os.path.exists(self.CACHE_VERSION_FILE):
            read_file(self.CACHE_VERSION_FILE)

        return None

Exemple #7

0

Afficher le fichier

Fichier : base.py Projet : DuyNguyen1879/mysql-governor

    def _get_previous_version(self):
        """
        Get current installed mysql version from cache file
        """
        if os.path.exists(self.CACHE_VERSION_FILE):
            read_file(self.CACHE_VERSION_FILE)

        return None

Exemple #8

0

Afficher le fichier

Fichier : resources.py Projet : imfht/flaskapps

    def post(self):
        start_time = time.time()
        args = self.parser.parse_args()

        # read data
        params = read_params(args['params'].stream)
        df = read_file(args['raw_data'].stream.read())
        y_train = read_file(args['labels'].stream.read())

        # build features
        X_train = build_features(df, params)
        y_train = y_train.set_index('example_id')
        y_train = y_train.loc[X_train.index]

        # train model
        cl = train_model(X_train, y_train.label, params)
        self.model_factory.add_pipeline(cl, params)
        if isinstance(cl, tpot.TPOTClassifier):
            final_classifier = cl.fitted_pipeline_
            evaluated_indivs = cl.evaluated_individuals_
        else:
            final_classifier = cl
            evaluated_indivs = None
        model_type = str(final_classifier)
        mean_accuracy, mean_roc_auc = cross_validate(final_classifier, X_train,
                                                     y_train.label)

        # format feat_eng_params
        feat_eng_params = params['extract_features'].copy()
        for k in feat_eng_params.keys():
            if k == 'default_fc_parameters':  # shows calculations like min, mean, etc.
                feat_eng_params[k] = str(feat_eng_params[k].keys())
            elif k == 'impute_function':
                feat_eng_params[k] = str(feat_eng_params[k].__name__)
            else:
                feat_eng_params[k] = str(feat_eng_params[k])


#        for k in feat_eng_params:
#            feat_eng_params[k] = str(feat_eng_params[k])
        result = {
            'trainTime': time.time() - start_time,
            'trainShape': X_train.shape,
            'modelType': model_type,
            'featureEngParams': feat_eng_params,
            'modelId': params['pipeline_id'],
            'mean_cv_accuracy': mean_accuracy,
            'mean_cv_roc_auc': mean_roc_auc,
            'evaluated_models': evaluated_indivs
        }
        self.model_factory[params['pipeline_id']]['stats'] = result
        return json.dumps(result)

Exemple #9

0

Afficher le fichier

def get_data(data_file, test_file):
    """ Produces training set, cross validation set and test set. """

    raw_data = utilities.read_file(data_file, True)
    test_data = utilities.read_file(test_file, True)
    x = array(raw_data, float64)
    y = x[:, 0]
    x = x[:, 1 : :]
    x_train, x_cv, y_train, y_cv = cross_validation.train_test_split(
        x, y, test_size=0.3, random_state=None)
    x = array(test_data, float64)
    y_test = x[:, 0]
    x_test = x[:, 1 : :]

    return (x_train, y_train, x_cv, y_cv, x_test, y_test)

Exemple #10

0

Afficher le fichier

Fichier : da.py Projet : DuyNguyen1879/mysql-governor

    def _before_install_new_packages(self):
        """
        Specific actions before new packages installation
        """
        print "The installation of MySQL for db_governor has started"

        check_file("/usr/local/directadmin/custombuild/build")
        check_file("/usr/local/directadmin/custombuild/options.conf")

        # MYSQL_DA_TYPE=`cat /usr/local/directadmin/custombuild/options.conf | grep mysql_inst= | cut -d= -f2`
        try:
            MYSQL_DA_TYPE = grep("/usr/local/directadmin/custombuild/options.conf", "mysql_inst=")[0].split("=")[1]
        except IndexError:
            MYSQL_DA_TYPE = ""

        if os.path.exists("/usr/share/lve/dbgovernor/da.tp.old"):
            if MYSQL_DA_TYPE == "no":
                MYSQL_DA_TYPE = read_file("/usr/share/lve/dbgovernor/da.tp.old")
            else:
                write_file("/usr/share/lve/dbgovernor/da.tp.old", MYSQL_DA_TYPE)
        else:
            write_file("/usr/share/lve/dbgovernor/da.tp.old", MYSQL_DA_TYPE)

        exec_command_out("/usr/local/directadmin/custombuild/build set mysql_inst no")

        self._mysqlservice("stop")

Exemple #11

0

Afficher le fichier

Fichier : correct_dict.py Projet : rjrequina/Cebuano-Dictionary

def remove_function_words():
    entries = read_file('data/cebposdict-nc.txt', strip=True, dict_format=True)

    function_tags = ['DET', 'PART', 'CONJ', 'PRON']
    function_words = {'DET': [], 'PART': [], 'CONJ': [], 'PRON': []}

    for tag in function_tags:
        for key, value in entries.iteritems():
            if tag in value:
                value.append('REM')
                function_words[tag].append(key)
    write_file('data/cebposdict-nc.txt',
               contents=[''],
               add_newline=False,
               mode='w')
    for key, value in sorted(entries.iteritems()):
        if 'REM' not in value:
            new_entry = [key + ' ']
            value = list(set(value))
            new_entry.append(' '.join(value))
            new_entry.append('\n')
            write_file('data/cebposdict-nc.txt',
                       contents=new_entry,
                       add_newline=False,
                       mode='a')
            new_entry = []

    for key, value in function_words.iteritems():
        write_file('data/' + key + '.txt',
                   contents=value,
                   add_newline=False,
                   append_newline=True,
                   mode='w')

Exemple #12

0

Afficher le fichier

Fichier : validation.py Projet : FindBoat/Kaggle

def generate_training_set(follow, followed, ratio, solution_file, data_file):
    """ Uses the solution file to generate training set to train
    the model, hoping this method can get better result.
    Ratio controls the fraction of pos and neg data sets, if ratio is -1,
    the fraction is the origion fraction."""

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for i in range(len(raw_solution)):
        row = raw_solution[i]
        dict_solution[int(row[0])] = set(int(n) for n in row[1 : :])

    x_train = [['spring brother is a true man']]
    for node in dict_solution.keys():
        nodes_pos = dict_solution[node]
        for n in nodes_pos:
            features = rank.get_features(follow, followed, node, n)
            x_train.append([1] + features)

        nodes_neg = candidate.get_candidates(follow, followed, node)
        nodes_neg.difference_update(nodes_pos)
        nodes_neg = list(nodes_neg)
        perm = random.permutation(len(nodes_neg))
        if ratio != -1:
            num = min(int(len(nodes_pos) * ratio), len(nodes_neg))
        else:
            num = len(nodes_neg)
        for i in range(num):
            node = nodes_neg[perm[i]]
            features = rank.get_features(follow, followed, node, n)
            x_train.append([0] + features)

    utilities.write_file(data_file, x_train)

Exemple #13

0

Afficher le fichier

Fichier : evaluator.py Projet : rjrequina/Cebuano-Stemmer

def to_panda_data():
    output = read_file('data/output_tokens.txt', strip=True)
    panda_data = []
    indexes = []
    for o in output:
        data = {}
        o = o.split(' ')
        indexes.append(o[0])
        data['root'] = o[1]

        data['is_root'] = o[0] == o[1]
        if o[2] != 'None':
            data['prefix'] = o[2]

        if o[3] != 'None':
            data['infix'] = o[3]

        if o[4] != 'None':
            data['suffix'] = o[4]

        data['is_entry'] = True if int(o[5]) == 1 else False

        if len(o) == 7:
            data['is_valid'] = True if int(o[6]) == 1 else False
        else:
            data['is_valid'] = 'Null'
        panda_data.append(data)

    return {'data': panda_data, 'index': indexes}

Exemple #14

0

Afficher le fichier

def write_non_dominated_frontiers(time, files_names, possible_values,
                                  dir_path):
    char_n_file_gen = Wise_permutations(files_names)
    for file_tp, n in char_n_file_gen:
        # Create folder and move there if not exists
        os.chdir(dir_path)
        create_folder(f'{n}{file_tp}')

        print(f'\n\n -------- {n}{file_tp} --------- \n\n')
        os.chdir(f'{dir_path}{n}{file_tp}/')

        fp = f'~/Dropbox/PI/PI2/data/n{n}q10{file_tp}.dat'
        file_stations = util.read_file(fp)
        stations = read_stations(file_stations)  # list of stations
        Sol.set_stations(stations)

        # For every file let us calculate every frontier
        tuples_combinations = Wise_permutations(possible_values)
        for params in tuples_combinations:
            print('params --> ', params)
            # Train model
            n_pob, ratio_sons, ratio_mutation, num_random_sols = params
            solutions = SolCollection(n_pob=n_pob,
                                      ratio_sons=ratio_sons,
                                      ratio_mutation=ratio_mutation,
                                      num_random_sols=num_random_sols)
            non_dom_result = solutions.train_time(time)
            # Save file
            out_file_name = tuples_combinations.string_params(params) + '.txt'
            np.savetxt(out_file_name, non_dom_result)

Exemple #15

0

Afficher le fichier

 def get_mysql_user(self):
     """
     Retrieve MySQL user name and password and save it into self attributes
     """
     if os.path.exists(self.DBPASS_PATH):
         self.MYSQLUSER = "******"
         self.MYSQLPASSWORD = read_file(self.DBPASS_PATH)

Exemple #16

0

Afficher le fichier

Fichier : readers.py Projet : NinjaTrappeur/PyramidBlog

 def read(self, filePath):
     """Return metadatas and content of a markdown file"""
     
     mdContent=read_file(filePath)
     md = markdown.Markdown(extensions = ['meta', 'codehilite'])
     htmlContent=md.convert(mdContent)
     return htmlContent,md.Meta

Exemple #17

0

Afficher le fichier

Fichier : parse_dict.py Projet : rjrequina/Cebuano-Dictionary

def resolve_equals():
    write_file('data/cebposdict-4.txt',
               contents=[''],
               no_encode=True,
               add_newline=False,
               mode='w')
    entries = read_file('data/cebposdict-3.txt', dict_format=True)
    result = []
    for key, value in entries.iteritems():
        words = nltk.word_tokenize(" ".join(value))
        new_entry = [key + ' ']
        related_words = []
        for word in words:
            if word in ['PART', 'ADJ', 'PRON', 'VERB', 'NOUN', 'NUM']:
                new_entry.append(word + ' ')
            elif word != '=':
                related_words.append(word)

        for rel_word in related_words:
            if rel_word in entries:
                values = entries[rel_word]
                words = nltk.word_tokenize(" ".join(value))
                # words = list(Text(" ".join(values)).words)
                for word in words:
                    if word in ['PART', 'ADJ', 'PRON', 'VERB', 'NOUN', 'NUM']:
                        new_entry.append(word + ' ')

        new_entry.append('\n')
        write_file('data/cebposdict-4.txt',
                   contents=new_entry,
                   add_newline=False,
                   mode='a')
        new_entry = []

    print('resolve_equals: Finished!')

Exemple #18

0

Afficher le fichier

def scrape_news_links():
    links = read_file('data/scraped/news-links.txt')
    if len(links) == 500:
        print("Status: Finished!\n")
        return

    url = "http://www.sunstar.com.ph/superbalita-cebu/balita"
    main_url = urlparse.urlparse(url).scheme + '://' + urlparse.urlparse(
        url).hostname
    stop_scraping_process = False
    i = 0
    limit = 500
    while i < limit and not stop_scraping_process:
        page = urlopen(url)
        soup = BeautifulSoup(page, 'html.parser')
        titles = soup.findAll('h3', {'class': 'title'})
        for title in titles:
            child = title.findChildren()[0]
            write_file("data/scraped/news-links.txt",
                       contents=[main_url + child.get('href')],
                       mode="a")
            print(main_url + child.get('href'))
            print("\n")
            i += 1
            if i == limit:
                break

        next_page = soup.find('a', {'title': 'Go to next page'})
        if next_page:
            url = main_url + next_page.get('href')
        else:
            stop_scraping_process = True

Exemple #19

0

Afficher le fichier

Fichier : validation.py Projet : sb123456789sb/MachineLearning-50

def generate_training_set(follow, followed, ratio, solution_file, data_file):
    """ Uses the solution file to generate training set to train
    the model, hoping this method can get better result.
    Ratio controls the fraction of pos and neg data sets, if ratio is -1,
    the fraction is the origion fraction."""

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for i in range(len(raw_solution)):
        row = raw_solution[i]
        dict_solution[int(row[0])] = set(int(n) for n in row[1::])

    x_train = [['spring brother is a true man']]
    for node in dict_solution.keys():
        nodes_pos = dict_solution[node]
        for n in nodes_pos:
            features = rank.get_features(follow, followed, node, n)
            x_train.append([1] + features)

        nodes_neg = candidate.get_candidates(follow, followed, node)
        nodes_neg.difference_update(nodes_pos)
        nodes_neg = list(nodes_neg)
        perm = random.permutation(len(nodes_neg))
        if ratio != -1:
            num = min(int(len(nodes_pos) * ratio), len(nodes_neg))
        else:
            num = len(nodes_neg)
        for i in range(num):
            node = nodes_neg[perm[i]]
            features = rank.get_features(follow, followed, node, n)
            x_train.append([0] + features)

    utilities.write_file(data_file, x_train)

Exemple #20

0

Afficher le fichier

Fichier : repos.py Projet : sanjibnarzary/Cebuano-POS-Tagger

def contextual_rules():
    raw = read_file('data/rules/CONTEXTUAL.txt', strip=True)
    rules = []

    for r in raw:
        rule = ContextualRule()

        string = r.split(' ')

        rule.operator = string[0]
        rule.target = string[1]

        i = 2
        while i < len(string):
            condition = ContextCondition()

            position = string[i]

            if 'C' in position:
                condition.careful_mode = True
                position = position.replace('C', '')

            condition.position = int(position)
            condition.pos_tag = string[i + 1]

            rule.context_conditions.append(condition)

            i += 2

        rules.append(rule)

    return rules

Exemple #21

0

Afficher le fichier

Fichier : base.py Projet : cloudlinuxadmin/mysql-governor

    def _get_new_version(self):
        """
        Get new sql version for install
        """
        if os.path.exists(self.NEW_VERSION_FILE):
            return read_file(self.NEW_VERSION_FILE)

        return "auto"

Exemple #22

0

Afficher le fichier

Fichier : base.py Projet : DuyNguyen1879/mysql-governor

    def _get_new_version(self):
        """
        Get new sql version for install
        """
        if os.path.exists(self.NEW_VERSION_FILE):
            return read_file(self.NEW_VERSION_FILE)

        return "auto"

Exemple #23

0

Afficher le fichier

 def upload_release_to_github(self):
     # Draft the upload to github
     release_notes = read_file(self.details.new_release_notes_path)
     pyperclip.copy(release_notes)
     print('The release notes are on the clipboard')
     github_url = F"'https://github.com/approvals/ApprovalTests.cpp/releases/new?tag={self.details.new_version}&title=Single%20Hpp%20File%20-%20{self.details.new_version}'"
     run(["open", github_url])
     run(["open", self.details.release_dir])
     check_step("that the release is published")

Exemple #24

0

Afficher le fichier

def get_entries():
    entries = read_file(name='data/cebposdict.txt',
                        strip=True,
                        dict_format=True)

    func_words = ['CONJ', 'DET', 'PART', 'PRON']

    for func in func_words:
        words = read_file(name='data/function_words/' + func + '.txt',
                          strip=True)

        for word in words:
            if word in entries:
                entries[word].append(func)
            else:
                entries[word] = [func]

    return entries

Exemple #25

0

Afficher le fichier

Fichier : validation.py Projet : sb123456789sb/MachineLearning-50

def mean_average_precision(result_file, solution_file):
    """ Calculates the mean average precision. """

    raw_result = utilities.read_file(result_file, True)
    raw_solution = utilities.read_file(solution_file, False)
    dict_result = {}
    for row in raw_result:
        dict_result[row[0]] = row[1::]
    dict_solution = {}
    for row in raw_solution:
        dict_solution[row[0]] = set(row[1::])

    res = 0.0
    for key in dict_result.keys():
        prediction = dict_result[key][0].split()
        ground_truth = dict_solution[key]
        res += ap(ground_truth, prediction)
    res /= len(dict_result)
    print 'mean average precision = %f' % res

Exemple #26

0

Afficher le fichier

Fichier : validation.py Projet : FindBoat/Kaggle

def mean_average_precision(result_file, solution_file):
    """ Calculates the mean average precision. """

    raw_result = utilities.read_file(result_file, True)
    raw_solution = utilities.read_file(solution_file, False)
    dict_result = {}
    for row in raw_result:
        dict_result[row[0]] = row[1 : :]
    dict_solution = {}
    for row in raw_solution:
        dict_solution[row[0]] = set(row[1 : :])

    res = 0.0
    for key in dict_result.keys():
        prediction = dict_result[key][0].split()
        ground_truth = dict_solution[key]
        res += ap(ground_truth, prediction)
    res /= len(dict_result)
    print 'mean average precision = %f' % res

Exemple #27

0

Afficher le fichier

def baseline(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)
    sub_data = utilities.read_file(submission_file, True)

    print 'Calculating hour averages...'
    hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(data)
    hour_avg = utilities.get_hour_avg(data)

    print 'Filling submission file...'
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                if chunk_id in hour_avg_by_chunk:
                    sub_data[i][j] = hour_avg_by_chunk[chunk_id][hour][j - 5]
                else:
                    sub_data[i][j] = hour_avg[hour][j - 5]

    utilities.write_file(output_file, sub_data)

Exemple #28

0

Afficher le fichier

Fichier : ui_config.py Projet : lmadrigalcr/BabylonTowerSolver

 def show_user_manual(self, widget, help_window, parent_window):
     """ Shows the configuration window.
     parameters:
         [AppGTK] self -- the self instance.
         [gtk.Widget] widget -- the widget event.
         [gtk.Object] config_window -- The configuration window object.
         [gtk.Object] config_window -- The start window object(parent).
     """
     if help_window and parent_window:
         self.help_label.set_text(utilities.read_file("user_manual.txt"))
         help_window.show()

Exemple #29

0

Afficher le fichier

Fichier : main.py Projet : FindBoat/Kaggle

def baseline(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)
    sub_data = utilities.read_file(submission_file, True)

    print 'Calculating hour averages...'
    hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(data)
    hour_avg = utilities.get_hour_avg(data)

    print 'Filling submission file...'
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                if chunk_id in hour_avg_by_chunk:
                    sub_data[i][j] = hour_avg_by_chunk[chunk_id][hour][j - 5]
                else:
                    sub_data[i][j] = hour_avg[hour][j - 5]

    utilities.write_file(output_file, sub_data)

Exemple #30

0

Afficher le fichier

def read_shopping_articles_from_file_to_json():
    path = os.getcwd()
    path = os.path.join(path, "data/")
    file = "shopping_articles.txt"
    raw_file = utils.read_file(base_path=path, filename=file, file_type="json")
    if type(raw_file) == dict:
        articles_json = raw_file
    else:
        articles_json = None
        logging.error(f"Read in file is type {type(raw_file)}!")
    return articles_json

Exemple #31

0

Afficher le fichier

Fichier : ui_config.py Projet : leomv09/BabylonTowerSolver

 def show_user_manual(self, widget, help_window, parent_window):
     """ Shows the configuration window.
     parameters:
         [AppGTK] self -- the self instance.
         [gtk.Widget] widget -- the widget event.
         [gtk.Object] config_window -- The configuration window object.
         [gtk.Object] config_window -- The start window object(parent).
     """
     if help_window and parent_window:
         self.help_label.set_text(utilities.read_file("user_manual.txt"))
         help_window.show()

Exemple #32

0

Afficher le fichier

    def update_conan_config_yml(self, conan_approvaltests_dir,
                                new_version_without_v):
        conan_data_file = os.path.join(conan_approvaltests_dir, 'config.yml')
        conandata_yml_text = read_file(conan_data_file)

        conan_data = \
F'''  {new_version_without_v}:
    folder: all
'''
        conandata_yml_text += conan_data

        write_file(conan_data_file, conandata_yml_text)

Exemple #33

0

Afficher le fichier

Fichier : da.py Projet : DuyNguyen1879/mysql-governor

    def _detect_version_if_auto(self):
        """
        Detect vesrion of MySQL if mysql.type is auto
        """
        print "Detect MySQL version for AUTO"

        check_file("/usr/local/directadmin/custombuild/build")
        check_file("/usr/local/directadmin/custombuild/options.conf")
        MYSQL_DA_VER = ""

        # MYSQL_DA_TYPE=`cat /usr/local/directadmin/custombuild/options.conf | grep mysql_inst= | cut -d= -f2`
        try:
            MYSQL_DA_VER = grep("/usr/local/directadmin/custombuild/options.conf", "mysql=")[0].split("=")[1].strip()
            MYSQL_DA_TYPE = grep("/usr/local/directadmin/custombuild/options.conf", "mysql_inst=")[0].split("=")[1].strip()
        except IndexError:
            MYSQL_DA_VER = ""
            MYSQL_DA_TYPE = ""
        if MYSQL_DA_TYPE == "no":
            if os.path.exists("/usr/share/lve/dbgovernor/da.tp.old"):
                MYSQL_DA_TYPE = read_file("/usr/share/lve/dbgovernor/da.tp.old")
            elif os.path.exists("/usr/bin/mysql"):
                result = exec_command("/usr/bin/mysql -V | grep -c 'MariaDB' -i || true", True)
                if result == "0":
                    MYSQL_DA_TYPE = "mysql"
                else:
                    MYSQL_DA_TYPE = "mariadb"

        print "I got %s and %s" % (MYSQL_DA_VER, MYSQL_DA_TYPE)

        mysql_version_map = {
            "5.0": "mysql50",
            "5.1": "mysql51",
            "5.5": "mysql55",
            "5.6": "mysql56",
            "5.7": "mysql57",
            "10.0.0": "mariadb100",
            "10.1.1": "mariadb101"
        }
        mariadb_version_map = {
            "10.1": "mariadb101",
            "10.0": "mariadb100",
            "5.6": "mariadb100",
            "5.5": "mariadb100",
            "10.0.0": "mariadb100",
            "10.1.1": "mariadb100"
        }

        if MYSQL_DA_TYPE == "mysql":
            MYSQL_DA_VER = mysql_version_map[MYSQL_DA_VER]
        elif MYSQL_DA_TYPE == "mariadb":
            MYSQL_DA_VER = mariadb_version_map[MYSQL_DA_VER]

        return MYSQL_DA_VER

Exemple #34

0

Afficher le fichier

Fichier : correct_dict.py Projet : rjrequina/Cebuano-Dictionary

def correct_NUM():
    contents = read_file('data/old-NUM.txt', strip=True, dict_format=True)
    entries = read_file('data/cebposdict-nc.txt', strip=True, dict_format=True)

    for content in contents:
        entries[content] = ['NUM']

    write_file('data/cebposdict-nc.txt',
               contents=[''],
               add_newline=False,
               mode='w')
    for key, value in sorted(entries.iteritems()):
        new_entry = [key + ' ']
        value = list(set(value))
        new_entry.append(' '.join(value))
        new_entry.append('\n')
        write_file('data/cebposdict-nc.txt',
                   contents=new_entry,
                   add_newline=False,
                   mode='a')
        new_entry = []

Exemple #35

0

Afficher le fichier

Fichier : ex02.py Projet : asdf247/Python101

def main():
    hamlet = read_file(filepath)

    hamlet_cleaned = clean_text(hamlet)

    #hamlet_wordcount = wordcount(hamlet_cleaned)
    #hamlet_wordcount = wordcount_counter(hamlet_cleaned)
    #hamlet_wordcount = wordcount_dd(hamlet_cleaned)
    hamlet_wordcount = word_count_err_handling(hamlet_cleaned)

    #print(hamlet_wordcount.most_common(50))
    print(hamlet_wordcount)

Exemple #36

0

Afficher le fichier

def avg(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)

    train_data, cv_data = preprocess.get_train_cv_data_by_chunk(data)
    targets_train, targets_cv = preprocess.get_train_cv_targets(
        train_data, cv_data)

    (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk, hour_avg,
     weekday_avg) = feature_extraction.get_avg_maps(train_data)

    x_train_all, x_cv_all = feature_extraction.get_x_by_avg(
        train_data, cv_data, chunk_avg, hour_avg_by_chunk,
        weekday_avg_by_chunk, hour_avg, weekday_avg)

    clfs = regression.linear_regression(x_train_all, x_cv_all, targets_train,
                                        targets_cv)
    clfs = regression.random_forest(x_train_all, x_cv_all, targets_train,
                                    targets_cv)

    print 'Filling submission file...'
    sub_data = utilities.read_file(submission_file, True)
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        weekday = ''
        all_features = feature_extraction.get_features(chunk_id, weekday, hour,
                                                       chunk_avg,
                                                       hour_avg_by_chunk,
                                                       weekday_avg_by_chunk,
                                                       hour_avg, weekday_avg)

        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                feature = []
                for f in all_features:
                    feature.append(f[j - 5])
                sub_data[i][j] = clfs[j - 5].predict([feature])[0]

    utilities.write_file(output_file, sub_data)

Exemple #37

0

Afficher le fichier

Fichier : test_utilities.py Projet : getrdone93/cfb-predictor

def test_read_file():
    in_file = 'file_that_doesnt_exist'

    def file_reader(**kwargs):
        csv = kwargs['csv_reader']
        return None

    try:
        out = util.read_file(input_file=in_file, func=file_reader)
    except IOError:
        assert (True)
    else:
        assert (False)

Exemple #38

0

Afficher le fichier

Fichier : main.py Projet : FindBoat/Kaggle

def avg(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)

    train_data, cv_data = preprocess.get_train_cv_data_by_chunk(data)
    targets_train, targets_cv = preprocess.get_train_cv_targets(
        train_data, cv_data)

    (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk,
     hour_avg, weekday_avg) = feature_extraction.get_avg_maps(train_data)

    x_train_all, x_cv_all = feature_extraction.get_x_by_avg(
            train_data, cv_data, chunk_avg, hour_avg_by_chunk,
             weekday_avg_by_chunk, hour_avg, weekday_avg)

    clfs = regression.linear_regression(
        x_train_all, x_cv_all, targets_train, targets_cv)
    clfs = regression.random_forest(
        x_train_all, x_cv_all, targets_train, targets_cv)

    print 'Filling submission file...'
    sub_data = utilities.read_file(submission_file, True)
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        weekday = ''
        all_features = feature_extraction.get_features(
            chunk_id, weekday, hour, chunk_avg, hour_avg_by_chunk,
            weekday_avg_by_chunk, hour_avg, weekday_avg)

        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                feature = []
                for f in all_features:
                    feature.append(f[j - 5])
                sub_data[i][j] = clfs[j - 5].predict([feature])[0]

    utilities.write_file(output_file, sub_data)

Exemple #39

0

Afficher le fichier

Fichier : validation.py Projet : FindBoat/Kaggle

def analyze_candidates(solution_file, follow, followed):
    """ Analyzes the method get_candidates. """

    raw_solution = utilities.read_file(solution_file, False)
    dict_solution = {}
    for row in raw_solution:
        dict_solution[int(row[0])] = set(int(n) for n in row[1 : :])

    count_total = 0
    count_miss = 0
    for node in dict_solution:
        candidates = candidate.get_candidates(follow, followed, node)
        for n in dict_solution[node]:
            if n not in candidates:
                count_miss += 1
        count_total += len(dict_solution[node])

    print 'count_total = %d, count_miss = %d' %(
        count_total, count_miss)

Exemple #40

0

Afficher le fichier

Fichier : main.py Projet : FindBoat/Kaggle

def time_series(training_file, submission_file, output_file):
    data = utilities.read_file(training_file, True)
    first_line = data[0]
    data = data[1 : :]
    data = preprocess.fill_NAs(data)

    (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk,
     hour_avg, weekday_avg) = feature_extraction.get_avg_maps(data)

    clf_map = regression.linear_regression_2(data)

    print 'Filling submission file...'
    chunk_map = utilities.get_chunk_map(data, 1)
    sub_data = utilities.read_file(submission_file, True)

    positions = [1, 2, 3, 4, 5, 10, 17, 24, 48, 72]
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        pos = positions[(i - 1) % 10]
        for j in range(5, len(sub_data[i])):
            target = j - 5
            if sub_data[i][j] == '0':
                if not chunk_id in chunk_map:
                    sub_data[i][j] = hour_avg[hour][target]
                else:
                    data_in_chunk = chunk_map[chunk_id]
                    start = len(data_in_chunk) - 24
                    t = len(data_in_chunk[0]) - 39 + target
                    features = []
                    prev_hour = 0
                    for k in range(start, len(data_in_chunk)):
                        features.append(float(data_in_chunk[k][t]))
                        if data_in_chunk[k][5] == hour:
                            prev_hour = float(data_in_chunk[k][t])

                    features.append(prev_hour)

                    # Binary hour features.
                    for h in range(24):
                        if h == int(hour):
                            features.append(1)
                        else:
                            features.append(0)

                    # Binary month features.
                    month = int(sub_data[i][4])
                    for m in range(1, 13):
                        if m == month:
                            features.append(1)
                        else:
                            features.append(0)

                    # Weather features.
                    tmp_length = len(data_in_chunk)
                    for k in range(6, 56):
                        features.append(float(data_in_chunk[tmp_length - 1][k]))
                    for k in range(6, 56):
                        features.append(float(data_in_chunk[tmp_length - 2][k]))

                    sub_data[i][j] = \
                        clf_map[(target, pos)].predict([features])[0]

    utilities.write_file(output_file, sub_data)