Ejemplo n.º 1
0
def download(course, item):
    """
    Download announcement JSON.
    :param course: A Course object.
    :param item: {
        "close_time": 2147483647,
        "user_id": 1069689,
        "open_time": 1411654451,
        "title": "Coursera",
        "deleted": 0,
        "email_announcements": "email_sent",
        "section_id": "14",
        "order": "6",
        "item_type": "announcement",
        "__type": "announcement",
        "published": 1,
        "item_id": "39",
        "message": "Hello, everyone.",
        "uid": "announcement39",
        "id": 39,
        "icon": ""
    }
    :return: None.
    """
    path = '{}/announcement/{}.json'
    path = path.format(course.get_folder(), item['item_id'])

    util.make_folder(path, True)
    util.write_json(path, item)

    content = util.read_file(path)
    content = util.remove_coursera_bad_formats(content)

    util.write_file(path, content)
Ejemplo n.º 2
0
def predict(num=1):  # num表示抽取的答案句数目
    system('.\svm_rank_windows\svm_rank_classify.exe %s %s %s' %
           (test_feature_path, model_path, test_predict_path))
    with open(test_feature_path, 'r',
              encoding='utf-8') as f1, open(test_predict_path,
                                            'r',
                                            encoding='utf-8') as f2:
        labels = {}
        for line1, line2 in zip(f1, f2):
            if len(line1) == 1:
                break
            qid = int(line1.split()[1].split(':')[1])
            if qid not in labels:
                labels[qid] = []
            labels[qid].append((float(line2.strip()), len(labels[qid])))
        seg_passages, res_lst = load_seg_passages(), read_json(test_path)
        for item in res_lst:  # 遍历文件中的每一行query信息
            qid, pid, q_words = item['qid'], item['pid'], item['question']
            rank_lst, seg_passage = sorted(
                labels[qid], key=lambda val: val[0],
                reverse=True), seg_passages[str(pid)]
            item['answer_sentence'] = [
                seg_passage[rank[1]] for rank in rank_lst[:num]
            ]  # 抽取答案句
        write_json(test_ans_path, res_lst)
def make_examples_simple(data_dir, n_users, negative_examples_per_user=10):
    G = snap.LoadEdgeList(snap.PUNGraph, data_dir + 'graph.txt', 0, 1)
    new_edges = defaultdict(dict)
    with open(data_dir + 'new_edges.txt') as f:
        for line in f:
            u, b = map(int, line.split())
            new_edges[u][b] = 1

    businesses = map(int, util.load_json(data_dir + 'business.json').keys())
    examples = defaultdict(dict)
    users = random.sample([NI.GetId() for NI in G.Nodes()], n_users)
    for u in users:
        examples[u] = new_edges[u]
        for i in range(negative_examples_per_user):
            b = random.choice(businesses)
            examples[u][b] = 0

    p, n = 0, 0
    for u in examples:
        for b in examples[u]:
            p += examples[u][b]
            n += 1 - examples[u][b]
    print "Positive:", p
    print "Negative:", n
    print "Data skew:", p / float(p + n)
    print "Sampling rate:", negative_examples_per_user / float(len(businesses))

    print "Writing examples..."
    util.write_json(examples, data_dir + 'examples_simple.json')
Ejemplo n.º 4
0
def make_examples_simple(data_dir, n_users, negative_examples_per_user=10):
    G = snap.LoadEdgeList(snap.PUNGraph, data_dir + 'graph.txt', 0, 1)
    new_edges = defaultdict(dict)
    with open(data_dir + 'new_edges.txt') as f:
        for line in f:
            u, b = map(int, line.split())
            new_edges[u][b] = 1

    businesses = map(int, util.load_json(data_dir + 'business.json').keys())
    examples = defaultdict(dict)
    users = random.sample([NI.GetId() for NI in G.Nodes()], n_users)
    for u in users:
        examples[u] = new_edges[u]
        for i in range(negative_examples_per_user):
            b = random.choice(businesses)
            examples[u][b] = 0

    p, n = 0, 0
    for u in examples:
        for b in examples[u]:
            p += examples[u][b]
            n += 1 - examples[u][b]
    print "Positive:", p
    print "Negative:", n
    print "Data skew:", p / float(p + n)
    print "Sampling rate:", negative_examples_per_user / float(len(businesses))

    print "Writing examples..."
    util.write_json(examples, data_dir + 'examples_simple.json')
Ejemplo n.º 5
0
def write_pr_matches(outfile):
    data = {}
    entries = get_names_from_elastic()

    if entries:
        for elem in entries:
                sdn_id = elem['_id']
                name = elem['_source']['primary_display_name']
                data[sdn_id] = []

                result = query_pr_content(name)
                for entry in result['hits']['hits']:
                    pr_elem = {
                        'pr_id': entry['_id'],
                        'link': entry['_source']['link'],
                        'date': entry['_source']['date'],
                        'title': entry['_source']['title'],
                    }
                    data[sdn_id].append(pr_elem)
                    other_dates = query_pr_date(entry['_source']['date'])
                    for date_entry in other_dates['hits']['hits']:
                        if (date_entry['_id'] != entry['_id'] and date_entry['_source']['link'] in entry['_source']['related']):
                            new_elem = {
                                'pr_id': date_entry['_id'],
                                'link': date_entry['_source']['link'],
                                'date': date_entry['_source']['date'],
                                'title': date_entry['_source']['title'],
			    }
                            data[sdn_id].append(new_elem)

    util.write_json(outfile, data)
Ejemplo n.º 6
0
def main():
    train_messages, train_labels = util.load_spam_dataset(
        '../data/ds6_train.tsv')
    val_messages, val_labels = util.load_spam_dataset('../data/ds6_val.tsv')
    test_messages, test_labels = util.load_spam_dataset('../data/ds6_test.tsv')

    dictionary = create_dictionary(train_messages)

    util.write_json('./output/p06_dictionary', dictionary)

    train_matrix = transform_text(train_messages, dictionary)

    np.savetxt('./output/p06_sample_train_matrix', train_matrix[:100, :])

    val_matrix = transform_text(val_messages, dictionary)
    test_matrix = transform_text(test_messages, dictionary)

    naive_bayes_model = fit_naive_bayes_model(train_matrix, train_labels)

    naive_bayes_predictions = predict_from_naive_bayes_model(
        naive_bayes_model, test_matrix)

    np.savetxt('./output/p06_naive_bayes_predictions', naive_bayes_predictions)

    naive_bayes_accuracy = np.mean(naive_bayes_predictions == test_labels)

    print('Naive Bayes had an accuracy of {} on the testing set'.format(
        naive_bayes_accuracy))

    top_5_words = get_top_five_naive_bayes_words(naive_bayes_model, dictionary)

    print('The top 5 indicative words for Naive Bayes are: ', top_5_words)

    util.write_json('./output/p06_top_indicative_words', top_5_words)
    """ 
Ejemplo n.º 7
0
def main():
    train_tweets, val_tweets, test_tweets, train_labels, val_labels, test_labels = load_dataset("final_data/compiled_data.csv")
    dictionary = create_dictionary(train_tweets)
    util.write_json('./output/dictionary', dictionary)
    train_matrix = transform_text(train_tweets, dictionary)
    val_matrix = transform_text(val_tweets, dictionary)
    test_matrix = transform_text(test_tweets, dictionary)

    naive_bayes_model = fit_naive_bayes_model(train_matrix, train_labels)
    naive_bayes_predictions = predict_from_naive_bayes_model(naive_bayes_model, test_matrix)
    naive_bayes_accuracy = np.mean(naive_bayes_predictions == test_labels)
    print("naive_bayes_results: ")
    unique, counts = np.unique(naive_bayes_predictions, return_counts=True)
    print(dict(zip(unique, counts)))
    print("test_labels: " )
    unique, counts = np.unique(test_labels, return_counts=True)
    print(dict(zip(unique, counts)))

    print('Naive Bayes had an accuracy of {} on the testing set'.format(naive_bayes_accuracy))
    top_5_words = get_top_five_naive_bayes_words(naive_bayes_model, dictionary)
    print('The top 5 indicative words for Naive Bayes are: ', top_5_words)

    optimal_radius = compute_best_svm_radius(train_matrix, train_labels, val_matrix, val_labels, [0.01, 0.1, 1, 10])
    util.write_json('./output/p06_optimal_radius', optimal_radius)
    print('The optimal SVM radius was {}'.format(optimal_radius))
    svm_predictions = svm.train_and_predict_svm(train_matrix, train_labels, test_matrix, optimal_radius)
    svm_accuracy = np.mean(svm_predictions == test_labels)
    print('The SVM model had an accuracy of {} on the testing set'.format(svm_accuracy, optimal_radius))
Ejemplo n.º 8
0
def run():
    results = {}
    index = get_index()
    datetimes = list(index.keys())
    for date in datetimes:
        hn_html_soup = BeautifulSoup(open(index[date][HN_KEY], "r"))
        dt_html_soup = BeautifulSoup(open(index[date][DT_KEY], "r"))
        dt_html_soup.find_all()
        hn_titles = extract_titles(hn_html_soup)
        hn_subtexts = extract_subtexts(hn_html_soup)
        dt_titles = extract_titles(dt_html_soup)
        dt_subtexts = extract_subtexts(dt_html_soup)
        results[date] = {
            "HackerNews": {
                "links": extract_links(hn_titles),
                "scores": extract_scores(hn_subtexts),
                "users": extract_users(hn_subtexts),
                "post_ages": extract_post_ages(hn_subtexts),
                "nr_of_comments": extract_nr_of_comments(hn_subtexts)
            },
            "DataTau": {
                "links": extract_links(dt_titles),
                "scores": extract_scores(dt_subtexts),
                "users": extract_users(dt_subtexts),
                "post_ages": extract_post_ages(dt_subtexts),
                "nr_of_comments": extract_nr_of_comments(dt_subtexts)
            }
        }
    write_json(results, "data/aggregation_results.json")
Ejemplo n.º 9
0
    def post(self):
        """POST handler for gallery albums.

        URL pattern: /albums
        POST data must contain album metadata: 'name'.

        Returns 201 CREATED with JSON data structure describing new album.
        Returns Content-type: application/json.
        Also returns Location header pointing to API URL for album details.
        
        Include 'wrapjson' parameter in POST to wrap returned JSON in
        a <textarea>. This also changes the returned Content-type to text/html.

        If request is poorly formatted returns 400 BAD REQUEST.

        Returns 401 UNAUTHORIZED to all calls if authorization fails.
        """
        try:
            data = dict(((str(k), v) for k, v in self.request.POST.items()))
            album = Album(album_id=config.ALBUM_ID_GENERATOR(),
                          **data)
        except:
            data = {}
            self.error(400)
        else:
            if not config.DEMO_MODE:
                album.put() 

            data = album.to_dict()
            self.response.headers['Location'] = data['url']
            self.response.set_status(201)
        
        write_json(self, data, wrapjson='wrapjson' in self.request.POST)
Ejemplo n.º 10
0
def export_sequence_json(T, path, prefix):
    from Bio import SeqIO
    plain_export = 0.99
    indent = None

    elems = {'root': {}}
    for node in T.find_clades():
        elems[node.clade] = {}

    for gene, aln_fname in get_genes_and_alignments(path, tree=True):
        seqs = {}
        for seq in SeqIO.parse(aln_fname, 'fasta'):
            seqs[seq.name] = seq

        root_seq = seqs[T.root.name]
        elems['root'][gene] = "".join(root_seq)
        for node in T.find_clades():
            nseq = seqs[node.name]
            if hasattr(node, "clade"):
                differences = {
                    pos: state
                    for pos, (
                        state,
                        ancstate) in enumerate(zip(nseq, elems['root'][gene]))
                    if state != ancstate
                }
                if len(differences) <= plain_export * len(seq):
                    elems[node.clade][gene] = differences
                else:
                    elems[node.clade][gene] = seq

    fname = sequence_json(path, prefix)
    write_json(elems, fname, indent=indent)
Ejemplo n.º 11
0
def calc_vsm_perform(similarity_func=calc_inner_product):
    if similarity_func.__name__ not in [
            calc_cosine.__name__, calc_inner_product.__name__,
            calc_jaccard.__name__
    ]:
        print('错误的输入相似度计算函数...')
        return
    print('正在加载训练集的预处理文件...')
    if file_exists(preprocess_path):
        res_lst = read_json(preprocess_path)  # 加载训练集初步处理后的文件
    else:
        res_lst = read_json(train_path)  # 加载训练集源文件
        for question in res_lst:
            question['question'] = seg_line(question['question'])
        write_json(preprocess_path, res_lst)

    print('正在计算相似度...')
    res = {}
    for item in res_lst:
        q_words, pid = {}, item['pid']
        for word in item['question']:
            q_words[word] = q_words.get(word, 0) + 1
        query_dic = {
            word: idf.get(word, 0) * (1 + log(tf, 10))
            for word, tf in q_words.items()
        }
        pred_pid = similarity_func(query_dic)[0][0]
        res[item['qid']] = int(pred_pid) == pid
        print('进度: %.2f%%' % (len(res) / len(res_lst) * 100))
    return len(list(filter(lambda val: res[val], res))) / len(res)
Ejemplo n.º 12
0
def comment(repo_dir, report):
    good = [] # All commits on all branches
    bad = [] # No commits
    ugly = [] # Partial uplift
    good, bad, ugly = classify_gbu(report)
    failed_bugs = []

    def x(bug_id):
        del report[bug_id]
        util.write_json(uplift.uplift_report_file, report)

    for i, j in (good, good_bug_comment), (bad, bad_bug_comment), (ugly, ugly_bug_comment):
        for bug_id in i:
            print "Commenting on bug %s" % bug_id
            try:
                j(repo_dir, bug_id, report[bug_id])
                x(bug_id)
            except FailedToComment:
                failed_bugs.append(bug_id)
                
    if len(failed_bugs) > 0:
        filename = os.path.abspath('failed_comments_%s.json' % util.time_str())
        print "The following bugs had commenting failures"
        print util.e_join(failed_bugs)
        print "Creating a file to use with the 'uplift comments' file to try just these."
        print "Fix the issue then run: uplift comments %s" % filename
        util.write_json(filename, report)
Ejemplo n.º 13
0
def run():
    npcs = {}

    npc_pages = api.query_category("Monsters")
    for name, page in npc_pages.items():
        if name.startswith("Category:"):
            continue

        try:
            code = mw.parse(page, skip_style_tags=True)

            for (vid, version) in util.each_version("Infobox Monster", code):
                doc = util.get_doc_for_id_string(name + str(vid), version,
                                                 npcs)
                if doc == None:
                    continue
                util.copy("name", doc, version, lambda x: x)
                for key in ["hitpoints", "combat"]:
                    try:
                        util.copy(key, doc, version, lambda x: int(x))
                    except ValueError:
                        print("NPC {} has an non integer {}".format(name, key))

        except (KeyboardInterrupt, SystemExit):
            raise
        except:
            print("NPC {} failed:".format(name))
            traceback.print_exc()

    for npcId in copy.copy(npcs):
        npc = npcs[npcId]
        if not 'combat' in npc:
            del npcs[npcId]

    util.write_json("npcs.json", "npcs.min.json", npcs)
Ejemplo n.º 14
0
Archivo: forum.py Proyecto: kq2/Ricin
def download_thread(course, threads_folder, thread_id, page=1, post_id=None):
    """
    Download a thread.
    """
    # Download 1st page
    url = '{}/api/forum/threads/{}'.format(course.get_url(), thread_id)
    if post_id:
        url = '{}?post_id={}&position=after'.format(url, post_id)

    path = '{}/{}/{}.json'.format(threads_folder, thread_id, page)
    util.download(url, path, course.get_cookie_file())

    thread = util.read_json(path)
    download_images(course, threads_folder, thread)

    util.write_json(path, thread)

    # Download rest pages
    page = thread['start_page']
    num_page = thread['num_pages']

    if page < num_page:
        page += 1
        print 'thread page {}/{}'.format(page, num_page)

        post_id = get_next_post_id(thread['posts'])
        if post_id:
            download_thread(course, threads_folder, thread_id, page, post_id)
Ejemplo n.º 15
0
def build_uplift_requirements(repo_dir):
    if os.path.exists(requirements_file) and util.ask_yn("Found existing requirements. Should they be used?"):
        bug_info = util.read_json(requirements_file)
    else:
        bug_info = {}
        enabled_branches = c.read_value('repository.enabled_branches')
        all_queries = c.read_value('queries')
        queries = []
        for branch in enabled_branches:
            queries.extend(all_queries[branch])

        bugs = [x for x in find_bugs(queries) if not is_skipable(x)]
        print "Fetching bug data"
        for bug_id in bugs:
            if is_skipable(bug_id):
                continue
            bug = bzapi.fetch_complete_bug(bug_id)
            print "+",
            needed_on = branch_logic.needed_on_branches(bug)
            if len(needed_on) == 0:
                continue
            b = bug_info[bug_id] = {}
            b['needed_on'] = needed_on
            b['already_fixed_on'] = branch_logic.fixed_on_branches(bug)
            b['summary'] = bug['summary']
        print "\nFinished fetching bug data"
        util.write_json(requirements_file, bug_info)
    return bug_info
Ejemplo n.º 16
0
Archivo: forum.py Proyecto: kq2/Ricin
def find_threads(course, forum_folder, forum_id):
    """
    Find all threads in current forum.
    Note: forum 0 has every thread!
    """
    # download the 1st page of given forum
    query = 'sort=firstposted&page=1'
    url = '{}/api/forum/forums/{}/threads?{}'
    url = url.format(course.get_url(), forum_id, query)
    path = forum_folder + '/temp.json'
    util.download(url, path, course.get_cookie_file())

    # download a huge page with all threads
    forum = util.read_json(path)
    num_threads = forum['total_threads']
    url += '&page_size={}'.format(num_threads)
    util.download(url, path, course.get_cookie_file())

    # add each thread's id to forum info
    threads = util.read_json(path)['threads']
    util.remove(path)

    path = forum_folder + '/info.json'
    forum = util.read_json(path)

    forum_threads = []
    for thread in reversed(threads):
        forum_threads.append({'id': thread['id']})

    forum['num_threads'] = num_threads
    forum['threads'] = forum_threads

    util.write_json(path, forum)
def run_random_walks(data_dir, weight_edges=False):
    print "Loading data and building transition matrix..."
    examples = util.load_json('./data/' + data_dir + '/examples.json')
    G = nx.read_edgelist('./data/' + data_dir + '/graph.txt', nodetype=int)
    if weight_edges:
        reviews = util.load_json('./data/' + data_dir + '/review.json')
        end_date = datetime.date(2012, 1, 1) if data_dir == 'train' else datetime.date(2013, 1, 1)
        edges = G.edges()
        for e in util.logged_loop(edges, util.LoopLogger(20000, len(edges), True)):
            n1, n2 = str(e[0]), str(e[1])
            if n1 not in reviews or n2 not in reviews[n1]:
                n1, n2 = n2, n1
            G[e[0]][e[1]]['weight'] = 1.0 / ((end_date - get_date(reviews[n1][n2][0])).days + 90)
        del reviews  # save some memory

    adjacency_matrix = nx.adjacency_matrix(G)
    inverse_degree_matrix = sparse.diags([[1.0 / adjacency_matrix.getrow(i).sum()
                                           for i in range(adjacency_matrix.shape[0])]], [0])
    transition_matrix = inverse_degree_matrix.dot(adjacency_matrix)

    print "Running random walks..."
    for u in util.logged_loop(examples, util.LoopLogger(10, len(examples), True)):
        p = run_random_walk(transition_matrix, int(u), 10).todense()
        for b in examples[u]:
            examples[u][b] = p[0, int(b)]

    util.write_json(examples, './data/' + data_dir
                    + ('/weighted_random_walks.json' if weight_edges else '/random_walks.json'))
Ejemplo n.º 18
0
def set_cli_defaults(code_root, cli_config, install_config):
    """Write install-time configuration options to the cli.jsonc file used to
    set run-time default values.
    """
    def _set_cli_default(template, name, default):
        template[name] = default

    in_path = os.path.join(code_root, cli_config['config_in'])
    out_path = os.path.join(code_root, cli_config['config_out'])
    print("Writing default settings to {}".format(out_path))
    try:
        cli_template = util.read_json(in_path)
    except Exception as exc:
        fatal_exception_handler(exc,
                                "ERROR: Couldn't read {}.".format(in_path))
    for key in cli_config['default_keys']:
        try:
            _set_cli_default(cli_template, key, install_config[key])
        except Exception as exc:
            fatal_exception_handler(exc, "ERROR: {} not set".format(key))
    if os.path.exists(out_path):
        print("{} exists; overwriting".format(out_path))
        os.remove(out_path)
    try:
        util.write_json(cli_template, out_path, sort_keys=False)
    except Exception as exc:
        fatal_exception_handler(exc,
                                "ERROR: Couldn't write {}.".format(out_path))
Ejemplo n.º 19
0
def svd_user_business(data_dir, k=50):
    print "Loading data and building user-business matrix..."
    users = util.load_json('./data/' + data_dir + '/user.json').keys()
    businesses = util.load_json('./data/' + data_dir + '/business.json').keys()
    examples = util.load_json('./data/' + data_dir + '/examples.json')

    user_to_row = dict(zip(users, range(len(users))))
    business_to_column = dict(zip(businesses, range(len(businesses))))

    user_business_matrix = sparse.lil_matrix((len(users), len(businesses)), dtype=float)
    with open('./data/' + data_dir + '/graph.txt') as f:
        for line in f:
            u, b = line.split()
            user_business_matrix[user_to_row[u], business_to_column[b]] = 1
    user_business_matrix = sparse.csr_matrix(user_business_matrix)

    print "Computing singular value decomposition..."
    u, s, vt = sparse.linalg.svds(user_business_matrix, k=k)
    us = u * s

    print "Writing results..."
    for u in examples:
        for b in examples[u]:
            examples[u][b] = np.dot(us[user_to_row[u], :], vt[:, business_to_column[b]])
    util.write_json(examples, './data/' + data_dir + '/svd.json')
Ejemplo n.º 20
0
def save_current_version(modules_iter):
  current_version={}
  for folder, conf in modules_iter:
    HEAD = git_command(folder, ["rev-parse", "HEAD"])
    HEAD = HEAD.strip()
    current_version[conf["name"]] = HEAD

  write_json(".version.snapshot.json", current_version)
Ejemplo n.º 21
0
def main(args):
    data_file, result_file, csv_file = args[0], args[1], args[2]
    data = util.load_json(data_file)
    result = count_to_freq(data)
    util.write_json(result_file, result)
    # save .csv file
    text = make_csv(result['data'])
    util.write_replace(csv_file, text.encode('utf-8'))
Ejemplo n.º 22
0
Archivo: video.py Proyecto: kq2/Ricin
def _download_old_quizzes(course, item, path):
    """
    Download old version in-video quizzes.
    """
    url = '{}/admin/quiz/quiz_load?quiz_id={}'
    url = url.format(course.get_url(), item['quiz']['parent_id'])
    util.download(url, path, course.get_cookie_file())
    util.write_json(path, util.read_json(path))
Ejemplo n.º 23
0
def save_current_version(modules_iter):
    current_version = {}
    for folder, conf in modules_iter:
        HEAD = git_command(folder, ["rev-parse", "HEAD"])
        HEAD = HEAD.strip()
        current_version[conf["name"]] = HEAD

    write_json(".version.snapshot.json", current_version)
Ejemplo n.º 24
0
def uplift(repo_dir, gaia_url, requirements):
    # Setup stuff
    t=util.time_start()
    print "Updating Gaia"
    git.create_gaia(repo_dir, gaia_url) # This is sadly broken
    print "Created Gaia in %0.2f seconds" % util.time_end(t)

    # Determining what needs to be uplifted
    with_commits = {}
    for bug_id in requirements.keys():
        if requirements[bug_id].has_key('commits'):
            with_commits[bug_id] = requirements[bug_id]

    ordered_commits = order_commits(repo_dir, with_commits)

    uplift = dict([(x, {}) for x in ordered_commits])

    # Uplifting
    for commit in ordered_commits:
        needed_on = []
        for bug_id in with_commits.keys():
            if commit in with_commits[bug_id]['commits']:
                for i in with_commits[bug_id]['needed_on']:
                    if not i in needed_on:
                        needed_on.append(i)
        print "\n", "="*80
        print "Attempting to uplift %s commit to %s" % (commit, util.e_join(needed_on))
        uplift[commit]['needed_on'] = needed_on
        result = uplift_commit(repo_dir, commit, needed_on)
        print "Sucess on %s" % util.e_join(result['success'].keys())
        print "Failure on %s" % util.e_join(result['failure'])
        uplift[commit]['uplift_status'] = result

    uplift_report = copy.deepcopy(with_commits)

    # Determinging which commits belong to which bugs
    for bug_id in uplift_report.keys():
        successful_branches = []
        failed_branches = []
        for commit in git.sort_commits(repo_dir, uplift_report[bug_id]['commits'], 'master'):
            if commit in uplift.keys():
                if not uplift_report[bug_id].has_key('uplift_status'):
                    uplift_report[bug_id]['uplift_status'] = {}
                u = uplift_report[bug_id]['uplift_status']
                u[commit] = copy.deepcopy(uplift[commit]['uplift_status'])
                failed_branches.extend([x for x in u[commit]['failure'] if x not in failed_branches])
                successful_branches.extend([x for x in u[commit]['success'].keys() if x not in successful_branches])
        # Because we might have multiple commits, we want to make sure that the list of successful branches
        # includes only those with *no* failing uplifts
        for i in range(len(successful_branches) - 1, -1, -1):
            if successful_branches[i] in failed_branches:
                del successful_branches[i]
        uplift_report[bug_id]['flags_to_set'] = branch_logic.flags_to_set(successful_branches)

    util.write_json(uplift_dated_file, uplift_report)
    util.write_json(uplift_report_file, uplift_report)
    return uplift_report
def save_result(filename, data):
    o = {
        '_last_update': last_update(),
        '_meta': {
            'char': len(list(data.keys())),
            'count': sum(list(data.values())),
        },
        'data': data,
    }
    util.write_json(filename, o)
Ejemplo n.º 26
0
 def _backup_config_file(self, config):
     """Record settings in file variab_dir/config_save.json for rerunning
     """
     out_file = os.path.join(self.MODEL_WK_DIR, 'config_save.json')
     if not self.file_overwrite:
         out_file, _ = util_mdtf.bump_version(out_file)
     elif os.path.exists(out_file):
         print('Overwriting {}.'.format(out_file))
     util.write_json(config.config.toDict(), out_file)
     return out_file
Ejemplo n.º 27
0
def create_package(folder, config, table, tag=None, github=True):
  """
    Creates 'package.json' based on 'module.json'.
  """

  replace_deps(config, table, "dependencies", tag, github)
  replace_deps(config, table, "devDependencies", tag, github)

  filename = os.path.join(folder, "package.json")
  print("Writing %s"%(filename))
  write_json(filename, config)
Ejemplo n.º 28
0
def create_package(folder, config, table, tag=None, github=True):
    """
    Creates 'package.json' based on 'module.json'.
  """

    replace_deps(config, table, "dependencies", tag, github)
    replace_deps(config, table, "devDependencies", tag, github)

    filename = os.path.join(folder, "package.json")
    print("Writing %s" % (filename))
    write_json(filename, config)
Ejemplo n.º 29
0
    def get(self):
        """GET handler for gallery albums.

        URL pattern: /albums
        
        Returns 200 OK with JSON data structure containing list of albums.
        Returns Content-type: application/json.

        Returns 401 UNAUTHORIZED to all calls if authorization fails.
        """
        write_json(self, [album.to_dict() for album in Album.all()]) 
def train_test(X_train, y_train, X_test, e_test, vectorizer):
    print "Training..."
    clf = GradientBoostingClassifier(n_estimators=2000, max_depth=4)
    clf.fit(X_train, y_train)

    print "Testing..."
    probas = clf.predict_proba(X_test)[:, 1]

    scores = defaultdict(dict)
    for (u, b), p in zip(e_test, probas):
        scores[u][b] = p
    util.write_json(scores, './data/test/supervised_classifier.json')
def train_test(X_train, y_train, X_test, e_test, vectorizer):
    print "Training..."
    clf = GradientBoostingClassifier(n_estimators=2000, max_depth=4)
    clf.fit(X_train, y_train)

    print "Testing..."
    probas = clf.predict_proba(X_test)[:, 1]

    scores = defaultdict(dict)
    for (u, b), p in zip(e_test, probas):
        scores[u][b] = p
    util.write_json(scores, './data/test/supervised_classifier.json')
Ejemplo n.º 32
0
def create_branch(root_dir, config, branch_name):
  for m in config["modules"]:
    folder = os.path.join(root_dir, m["folder"])
    argv = ["branch", branch_name]
    git_command(folder, argv)
    argv = ["checkout", branch_name]
    git_command(folder, argv)

  for m in config["modules"]:
    m["branch"] = branch_name

  write_json("project.json", config)
def supervised_methods(methods):
    num_features=14
    #train dates for make dataset: 2012-01-01 to 2012-07-01;
    #for examples it is 2011-07-01
    train=build_features("./data/train/user.json",
                         "./data/train/business.json",
                         "./data/train/examples.json",
                         "./data/train/graph.txt",
				   "./data/train/user_adamic_adar.json",
				   "./data/train/biz_adamic_adar.json",
				   "./data/train/user_cn.json",
				   "./data/train/biz_cn.json",
				   "./data/train/user_jaccard.json",
				   "./data/train/biz_jaccard.json",
				   num_features,datetime.date(2011,7,1))
    #test dates fot the make dataset: 2013-01-01 to 2013-07-01;
                         #for examples it is 2012-07-01
    test=build_features( "./data/test/user.json",
                         "./data/test/business.json",
                         "./data/test/examples.json",
                         "./data/test/graph.txt",
				   "./data/test/user_adamic_adar.json",
				   "./data/test/biz_adamic_adar.json",
				   "./data/test/user_cn.json",
				   "./data/test/biz_cn.json",
				   "./data/test/user_jaccard.json",
				   "./data/test/biz_jaccard.json",
				   num_features,datetime.date(2012,7,1),True)

    for method in methods:
        clf=None
        if method=="RandomForest":
            clf=RandomForestClassifier(n_estimators=100,
                        max_features=num_features, oob_score=True)
        elif method=="GBM":
            clf=GradientBoostingClassifier(n_estimators=100,
                                           max_features=num_features)
        else:
            continue

        clf=clf.fit(train["features"],train["target"])
        probs=clf.predict_proba(test["features"])
        prob_json=test["probs"]
        for u in prob_json:
            for b in prob_json[u]:
                prob_json[u][b]=float(probs[prob_json[u][b]][1])

        util.write_json(prob_json,"./data/results/"+method+".json")
        with open("./data/results/"+method+"_scores.txt","w") as f:
            f.write("===feat. importance==="+str(clf.feature_importances_)+"\n")
            f.flush()
        f.close()
Ejemplo n.º 34
0
def export_metadata_json(T, path, prefix, indent):
    print("Writing out metaprocess")
    meta_json = {}

    meta_json["virus_count"] = T.count_terminals()
    from datetime.date import today
    meta_json["updated"] = today().strftime('%Y-%m-%d')
    meta_json["author_info"] = {}
    meta_json["seq_author_map"] = {}

    # join up config color options with those in the input JSONs.
    col_opts = process.config["auspice"]["color_options"]
    if process.colors:
        for trait, col in process.colors.iteritems():
            if trait in col_opts:
                col_opts[trait]["color_map"] = col
            else:
                process.log.warn(
                    "{} in colors (input JSON) but not auspice/color_options. Ignoring"
                    .format(trait))

    meta_json["color_options"] = col_opts
    if "date_range" in process.config["auspice"]:
        meta_json["date_range"] = process.config["auspice"]["date_range"]
    if "analysisSlider" in process.config["auspice"]:
        meta_json["analysisSlider"] = process.config["auspice"][
            "analysisSlider"]
    meta_json["panels"] = process.config["auspice"]["panels"]
    meta_json["updated"] = time.strftime("X%d %b %Y").replace('X0',
                                                              'X').replace(
                                                                  'X', '')
    meta_json["title"] = process.info["title"]
    meta_json["maintainer"] = process.info["maintainer"]
    meta_json["filters"] = process.info["auspice_filters"]

    if "defaults" in process.config["auspice"]:
        meta_json["defaults"] = process.config["auspice"]["defaults"]

    try:
        from pygit2 import Repository, discover_repository
        current_working_directory = os.getcwd()
        repository_path = discover_repository(current_working_directory)
        repo = Repository(repository_path)
        commit_id = repo[repo.head.target].id
        meta_json["commit"] = str(commit_id)
    except ImportError:
        meta_json["commit"] = "unknown"
    if len(process.config["auspice"]["controls"]):
        meta_json["controls"] = process.make_control_json(
            process.config["auspice"]["controls"])
    meta_json["geo"] = process.lat_longs
    write_json(meta_json, prefix + '_meta.json')
Ejemplo n.º 35
0
def run_random_walks(data_dir, weight_edges=False):
    print("Loading data and building transition matrix...")
    examples = util.load_json('./data/' + data_dir +
                              '/oag_examples_simple.json')
    G = nx.read_edgelist('./data/' + data_dir + '/graph.txt', nodetype=int)

    # Get all nodes, but not the edges(those need to be predicted)
    with open('./data/nid_to_id.txt', 'r') as file:
        line = file.readline()
        while line:
            keys = line.split()
            if keys[0] not in G:
                G.add_node(keys[0])
            line = file.readline()

    # Real id to substitute id
    #id_map = {}
    #count = 0
    #for n in G:
    #    id_map[n] = count
    #    count += 1

    #if weight_edges:
    #    reviews = util.load_json('./data/' + data_dir + '/review.json')
    #    end_date = datetime.date(2012, 1, 1) if data_dir == 'train' else datetime.date(2013, 1, 1)
    #    edges = G.edges()
    #    for e in util.logged_loop(edges, util.LoopLogger(20000, len(edges), True)):
    #        n1, n2 = str(e[0]), str(e[1])
    #        if n1 not in reviews or n2 not in reviews[n1]:
    #            n1, n2 = n2, n1
    #        G[e[0]][e[1]]['weight'] = 1.0 / ((end_date - get_date(reviews[n1][n2][0])).days + 90)
    #    del reviews  # save some memory

    adjacency_matrix = nx.adjacency_matrix(G)
    inverse_degree_matrix = sparse.diags([[
        1.0 / adjacency_matrix.getrow(i).sum()
        for i in range(adjacency_matrix.shape[0])
    ]], [0])
    transition_matrix = inverse_degree_matrix.dot(adjacency_matrix)

    print("Running random walks...")
    for u in util.logged_loop(examples,
                              util.LoopLogger(10, len(examples), True)):
        p = run_random_walk(transition_matrix, int(u),
                            10).todense()  #row for adj matrix
        for b in examples[u]:
            examples[u][b] = p[0, int(b)]

    util.write_json(
        examples,
        './data/' + data_dir + ('/oag_weighted_random_walks.json'
                                if weight_edges else '/oag_random_walks.json'))
def train():
    phi = get_phi(True)

    print "Loading examples..."
    Ds, Ls = {}, {}
    examples = util.load_json('./data/train/examples.json')
    us = list(examples.keys())
    random.seed(0)
    random.shuffle(us)
    for u in us:
        D, L = set(), set()
        for b in examples[u]:
            (D if examples[u][b] == 1 else L).add(int(b))
        if len(D) > MAX_POSITIVE_EDGES_PER_USER:
            D = random.sample(D, MAX_POSITIVE_EDGES_PER_USER)
        if len(L) > MAX_NEGATIVE_EDGES_PER_USER:
            L = random.sample(L, MAX_POSITIVE_EDGES_PER_USER)
        if len(D) > 1 and len(L) > 10:
            Ds[int(u)] = list(D)
            Ls[int(u)] = list(L)
            if len(Ds) > NUM_TRAIN_USERS:
                break

    print "Setting initial conditions..."
    ps = {}
    for u in Ds:
        p = np.zeros(phi['bias'].shape[0])
        p[u] = 1.0
        ps[u] = sparse.csr_matrix(p)

    print "Training..."
    w = INITIAL_WEIGHTS
    best_loss = 100000
    for i in range(100):
        print "ITERATION " + str(i + 1) + ": base"
        base_loss, ps = run(phi, w, Ds, Ls, ps)
        if base_loss < best_loss:
            best_loss = base_loss
            util.write_json(w, './data/supervised_random_walks_weights.json')

        partials = {}
        for k in w:
            print "ITERATION " + str(i + 1) + ": " + k
            new_w = w.copy()
            new_w[k] += H
            new_loss, _ = run(phi, new_w, Ds, Ls, ps)
            partials[k] = (new_loss - base_loss) / H

            print partials[k] * LEARNING_RATE

        for (k, dwk) in partials.iteritems():
            w[k] -= LEARNING_RATE * dwk
def train():
    phi = get_phi(True)

    print "Loading examples..."
    Ds, Ls = {}, {}
    examples = util.load_json("./data/train/examples.json")
    us = list(examples.keys())
    random.seed(0)
    random.shuffle(us)
    for u in us:
        D, L = set(), set()
        for b in examples[u]:
            (D if examples[u][b] == 1 else L).add(int(b))
        if len(D) > MAX_POSITIVE_EDGES_PER_USER:
            D = random.sample(D, MAX_POSITIVE_EDGES_PER_USER)
        if len(L) > MAX_NEGATIVE_EDGES_PER_USER:
            L = random.sample(L, MAX_POSITIVE_EDGES_PER_USER)
        if len(D) > 1 and len(L) > 10:
            Ds[int(u)] = list(D)
            Ls[int(u)] = list(L)
            if len(Ds) > NUM_TRAIN_USERS:
                break

    print "Setting initial conditions..."
    ps = {}
    for u in Ds:
        p = np.zeros(phi["bias"].shape[0])
        p[u] = 1.0
        ps[u] = sparse.csr_matrix(p)

    print "Training..."
    w = INITIAL_WEIGHTS
    best_loss = 100000
    for i in range(100):
        print "ITERATION " + str(i + 1) + ": base"
        base_loss, ps = run(phi, w, Ds, Ls, ps)
        if base_loss < best_loss:
            best_loss = base_loss
            util.write_json(w, "./data/supervised_random_walks_weights.json")

        partials = {}
        for k in w:
            print "ITERATION " + str(i + 1) + ": " + k
            new_w = w.copy()
            new_w[k] += H
            new_loss, _ = run(phi, new_w, Ds, Ls, ps)
            partials[k] = (new_loss - base_loss) / H

            print partials[k] * LEARNING_RATE

        for (k, dwk) in partials.iteritems():
            w[k] -= LEARNING_RATE * dwk
Ejemplo n.º 38
0
Archivo: course.py Proyecto: kq2/Ricin
    def download_info(self):
        url = self.url
        temp = self.info_folder + '/temp.html'
        util.download(url, temp, self.cookie_file)
        page_html = util.read_file(temp)
        util.remove(temp)

        info_files = ['user.json', 'course.json', 'sidebar.json']
        matches = re.findall(r'JSON\.parse\("(.*?)"\);', page_html)
        for match, info_file in zip(matches, info_files)[1:]:
            info = util.unicode_unescape(match).replace('\\\\', '')
            path = '{}/{}'.format(self.info_folder, info_file)
            util.write_json(path, util.read_json(info, True))
Ejemplo n.º 39
0
def predict():
    res_lst = read_json(test_select_path)
    for item in res_lst:
        item['answer'] = get_ans(item['question'], item['label'],
                                 item['answer_sentence'][0])
    res_lst.sort(key=lambda val: val['qid'])
    write_json(test_span_path, res_lst)  # 对结果数组按照qid升序排列,可注释掉此行
    write_json(test_answer_path, [{
        'qid': item['qid'],
        'question': ''.join(item['question']),
        'answer_pid': [item['pid']],
        'answer': item['answer']
    } for item in res_lst])
Ejemplo n.º 40
0
def write_ofac_id_matches(infile, outfile):
    table = str.maketrans(dict.fromkeys(string.punctuation + '\n'))

    data = {}  # { sdn_id : ofac_website_id }
    ofac_name_to_id = {}  # {ofac_name : ofac_website_id}

    with open(infile) as f:
        for line in f:
            ofac_id, name = line.split('|')
            name = name.lower().translate(table).strip()
            ofac_name_to_id[name] = ofac_id
    ofac_names = ofac_name_to_id.keys()

    entries = get_names_from_elastic()

    num_not_found = 0
    for entry in entries:
        sdn_id = entry['_id']
        name = entry['_source']['primary_display_name'].lower().translate(
            table).strip()

        try:
            best_match = get_close_matches(name, ofac_names, n=1,
                                           cutoff=1.0)[0]
            ofac_website_id = ofac_name_to_id[best_match]
            data[sdn_id] = ofac_website_id
        except:
            # Try to transpose the words in a name and search for them
            found = False
            new = name
            for _ in range(name.count(' ')):
                first, space, last = new.partition(' ')
                new = last + ' ' + first
                try:
                    best_match = get_close_matches(new,
                                                   ofac_names,
                                                   n=1,
                                                   cutoff=1.0)[0]
                    ofac_website_id = ofac_name_to_id[best_match]
                    data[sdn_id] = ofac_website_id
                    found = True
                    break
                except:
                    pass
            if not found:
                num_not_found += 1
    log(
        f'{num_not_found} IDs were unable to be matched to their OFAC website counterpart',
        'warning')

    util.write_json(outfile, data)
Ejemplo n.º 41
0
    def post(self, album_id):
        """POST handler for a gallery image.

        URL pattern: /albums/${album_id}/images
        POST data must be of type multipart/form and contain image as 'file'.
        POST data must also contain image metadata: 'name'.
        Image filename must include an extension.

        Returns 201 CREATED with JSON data structure describing new image.
        Returns Content-type: application/json.
        Also returns Location header pointing to API URL for image details.

        Include 'wrapjson' parameter in POST to wrap returns JSON in
        a <textarea>. This also changes the returned Content-type to text/html.

        If album doesn't exist, returns 404 NOT FOUND.
        If request is poorly formatted returns 400 BAD REQUEST.

        Returns 401 UNAUTHORIZED to all calls if authorization fails.
        """
        q = Album.all().filter('album_id =', album_id)
        album = q.get()
        if not album:
            return self.error(404)

        try:
            data = dict(((str(k), v) for k, v in self.request.POST.items()))
            if 'file' in data:
                data['extension'] = data['file'].filename.split('.')[-1].lower()
                if data['extension'] == data['file'].filename:
                    data['extension'] = ''
                else:
                    data['extension'] = '.' + data['extension']
                data['image_data'] = data['file'].file.read()

            image = Image(image_id=config.IMAGE_ID_GENERATOR(),
                          album=album,
                          **data)
        except:
            data = {}
            self.error(400)
        else:
            if not config.DEMO_MODE:
                image.put()

            data = image.to_dict()
            self.response.headers['Location'] = data['url']
            self.response.set_status(201)

        write_json(self, data, wrapjson='wrapjson' in self.request.POST)
Ejemplo n.º 42
0
def supervised_methods(methods):
    num_features = 14
    #train dates for make dataset: 2012-01-01 to 2012-07-01;
    #for examples it is 2011-07-01
    train = build_features(
        "./data/train/user.json", "./data/train/business.json",
        "./data/train/examples.json", "./data/train/graph.txt",
        "./data/train/user_adamic_adar.json",
        "./data/train/biz_adamic_adar.json", "./data/train/user_cn.json",
        "./data/train/biz_cn.json", "./data/train/user_jaccard.json",
        "./data/train/biz_jaccard.json", num_features,
        datetime.date(2011, 7, 1))
    #test dates fot the make dataset: 2013-01-01 to 2013-07-01;
    #for examples it is 2012-07-01
    test = build_features("./data/test/user.json", "./data/test/business.json",
                          "./data/test/examples.json", "./data/test/graph.txt",
                          "./data/test/user_adamic_adar.json",
                          "./data/test/biz_adamic_adar.json",
                          "./data/test/user_cn.json",
                          "./data/test/biz_cn.json",
                          "./data/test/user_jaccard.json",
                          "./data/test/biz_jaccard.json", num_features,
                          datetime.date(2012, 7, 1), True)

    for method in methods:
        clf = None
        if method == "RandomForest":
            clf = RandomForestClassifier(n_estimators=100,
                                         max_features=num_features,
                                         oob_score=True)
        elif method == "GBM":
            clf = GradientBoostingClassifier(n_estimators=100,
                                             max_features=num_features)
        else:
            continue

        clf = clf.fit(train["features"], train["target"])
        probs = clf.predict_proba(test["features"])
        prob_json = test["probs"]
        for u in prob_json:
            for b in prob_json[u]:
                prob_json[u][b] = float(probs[prob_json[u][b]][1])

        util.write_json(prob_json, "./data/results/" + method + ".json")
        with open("./data/results/" + method + "_scores.txt", "w") as f:
            f.write("===feat. importance===" + str(clf.feature_importances_) +
                    "\n")
            f.flush()
        f.close()
Ejemplo n.º 43
0
def svd(data_dir, k=50):
    print "Loading data and building adjacency matrix..."
    examples = util.load_json('./data/' + data_dir + '/examples.json')
    G = nx.read_edgelist('./data/' + data_dir + '/graph.txt', nodetype=int)
    adjacency_matrix = sparse.csr_matrix(nx.adjacency_matrix(G), dtype=float)

    print "Computing singular value decomposition..."
    u, s, vt = sparse.linalg.svds(adjacency_matrix, k=k)
    us = u * s

    print "Writing results..."
    for u in examples:
        for b in examples[u]:
            examples[u][b] = np.dot(us[u, :], vt[:, b])
    util.write_json(examples, './data/' + data_dir + '/svd.json')
Ejemplo n.º 44
0
 def run_minnpm(self, pkg_info):
     start_time = time.time()
     try:
         (tgz, pkg_target, mode_configuration) = pkg_info
         pkg_path = f'{pkg_target}/package'
         output_path = f'{pkg_path}/experiment.out'
         output_status_path = f'{pkg_path}/experiment.json'
         error_status_path = f'{pkg_path}/error.json'
         self.unpack_tarball_if_needed(tgz, pkg_target)
         with open(output_path, 'wt') as out:
             exit_code = subprocess.Popen(solve_command(mode_configuration),
                                          cwd=pkg_path,
                                          stdout=out,
                                          stderr=out).wait(self.timeout)
         duration = time.time() - start_time
         if exit_code == 0:
             write_json(output_status_path, {
                 'status': 'success',
                 'time': duration
             })
             return None
         status = self.get_npmstatus(output_path)
         if status in [
                 'ERESOLVE', 'ETARGET', 'EUNSUPPORTEDPROTOCOL',
                 'EBADPLATFORM'
         ]:
             # TODO(arjun): This is for compatibility with older data. If
             # we do a totally fresh run, can refactor to stick reason into
             # status and remove the 'cannot_install' status.
             write_json(output_status_path, {
                 'status': 'cannot_install',
                 'reason': status
             })
             return None
         write_json(error_status_path, {
             'status': 'unexpected',
             'detail': output_path
         })
         return f'Failed: {pkg_path}'
     except subprocess.TimeoutExpired:
         write_json(error_status_path, {'status': 'timeout'})
         return f'Timeout: {pkg_path}'
     except BaseException as e:
         write_json(error_status_path, {
             'status': 'unexpected',
             'detail': e.__str__()
         })
         return f'Exception: {pkg_path} {e}'
def test():
    phi = get_phi(False)
    examples = util.load_json("./data/test/examples.json")
    w = util.load_json("./data/supervised_random_walks_weights.json")

    print "Computing Q and initializing..."
    Q = get_Q(phi, w)
    ps = {}
    for u in examples:
        p = np.zeros(phi["bias"].shape[0])
        p[int(u)] = 1.0
        ps[int(u)] = sparse.csr_matrix(p)
    get_ps(Q, ps, max_iter=20, convergence_criteria=0, log=True, examples=examples)

    print "Writing..."
    util.write_json(examples, "./data/test/supervised_random_walks.json")
def test():
    phi = get_phi(False)
    examples = util.load_json('./data/test/examples.json')
    w = util.load_json('./data/supervised_random_walks_weights.json')

    print "Computing Q and initializing..."
    Q = get_Q(phi, w)
    ps = {}
    for u in examples:
        p = np.zeros(phi['bias'].shape[0])
        p[int(u)] = 1.0
        ps[int(u)] = sparse.csr_matrix(p)
    get_ps(Q, ps, max_iter=20, convergence_criteria=0, log=True, examples=examples)

    print "Writing..."
    util.write_json(examples, './data/test/supervised_random_walks.json')
Ejemplo n.º 47
0
    def get(self, album_id, image_id, extension=None):
        """GET handler for GGB image metadata and files.

        URL pattern: /albums/${album_id}/images/${image_id}(${extension})

        If called without a file extension:
            If image exists, returns 200 OK with JSON image data structure.
            Returns Content-type: application/json.
            If image doesn't exist, returns 404 NOT FOUND.
        
        If called with a file extension:
            If image exists and has the matching extension, returns the image.
            Returned Content-type matches the image format.
            Otherwise returns 404 NOT FOUND.
       
        Returns 401 UNAUTHORIZED to all calls if authorization fails.
        """
        q = Album.all().filter('album_id =', album_id)
        album = q.get()
        if not album:
            return self.error(404)

        q = Image.all().filter('album =', album).filter('image_id =', image_id)
        image = q.get()
        if not image:
            return self.error(404)

        if not extension:
            data = image.to_dict()
            return write_json(self, image.to_dict())
        
        if extension != image.extension:
            return self.error(404)
   
        write_image(self, image.image_data, image.extension)
Ejemplo n.º 48
0
def predict(similarity_func=calc_inner_product
            ):  # 对测试集进行预测,要求在此函数前必须执行了vsm_init()函数.
    if similarity_func.__name__ not in [
            calc_cosine.__name__, calc_inner_product.__name__,
            calc_jaccard.__name__
    ]:
        print('错误的输入相似度计算函数...')
        return
    test_lst = read_json(test_path)
    for q_item in test_lst:
        q_item['question'] = seg_line(q_item['question'])  # 分词
        q_item['pid'] = int(
            similarity_func(
                {word: idf.get(word, 0)
                 for word in q_item['question']})[0][0])
    write_json(test_predict_path, test_lst)
Ejemplo n.º 49
0
def get_nutrient_df(index):
    filename = "ingredient_{}.csv".format(str(index))
    ing_lst = pd.read_csv(filename).key.to_list()
    appid, key = ID[index % 5]
    headers = {
        "Content-Type": "application/json",
        "x-app-id": appid,
        "x-app-key": key,
        "x-remote-user-id": '0'
    }
    ing_dict = dict()
    for ing in ing_lst:
        ing_dict[ing] = get_nutrient(ing, headers)

    output = str(index) + ".json"
    util.write_json(ing_dict, output)
Ejemplo n.º 50
0
def read_pokemonid():
    # data -> http://www.pokemon.com/us/pokedex/
    list_pokemons = []
    pokemons = {}

    with open("id.txt", 'r', encoding='utf-8') as datafile:
        for line in datafile:
            if line != '\n':
                pokemon = {}
                temp = line.replace('\n', '').split(' ')
                list_pokemons.append([temp[0], int(temp[1])])

        for i, pokemon in enumerate(list_pokemons):
            pokemons[i + 1] = pokemon[0]

        util.write_json(pokemons, 'id.json')
Ejemplo n.º 51
0
def push(repo_dir):
    branches = c.read_value('repository.enabled_branches')
    preview_push_info = git.push(repo_dir, remote="origin", branches=branches, dry_run=True)
    print "If you push, you'd be pushing: "
    _display_push_info(preview_push_info)
    if not util.ask_yn('Do you wish to push?'):
        return None
    for i in range(5):
        try:
            rv = git.push(repo_dir, remote="origin", branches=branches, dry_run=False)
            util.write_json(push_info_file, rv)
            print "Push attempt %d worked" % int(i+1)
            return rv
        except:
            print "Push attempt %d failed" % int(i+1)
    raise git.PushFailure("remote %s branches %s" % (remote, util.e_join(branches)))
Ejemplo n.º 52
0
def for_all_bugs(repo_dir, requirements, upstream="master"):
    # Let's see if we have any commits in the req file.
    any_bug_has_commits = False
    bugs_without_commits = []
    for bug_id in requirements:
        if requirements[bug_id].has_key('commits'):
            if len(requirements[bug_id]['commits']) > 0:
                any_bug_has_commits = True
        else:
            bugs_without_commits.append(bug_id)

    if any_bug_has_commits:
        print "Some bugs in this requirements file already have commits."
        # reuse is use the existing commits, don't ask for more.
        # add is use the existing commits for bugs that have no commits, ignore others
        # delete will remove the commits from the requirements dictionary
        prompt = "Enter 'reuse', 'add' or 'delete': "
        user_input = raw_input(prompt).strip()
        while user_input not in ('reuse', 'add', 'delete'):
            user_input = raw_input(prompt).strip()
        
        if user_input == 'reuse':
            bugs_to_find = [] # just use what's in the file
        elif user_input == 'add':
            bugs_to_find = bugs_without_commits # Only ask for commits for commit-less bugs
        elif user_input == 'delete':
            # Delete the commits that are in the requirements file
            for bug_id in requirements.keys():
                if requirements[bug_id].has_key('commits'):
                    del requirements[bug_id]['commits']
            util.write_json(uplift.requirements_file, requirements)
            bugs_to_find = requirements.keys()
        else:
            raise Exception("Huh?")
    else:
        bugs_to_find = requirements.keys()

    pruned_bugs_to_find = [x for x in bugs_to_find if not uplift.is_skipable(x)]
    j=0
    for bug_id in sorted(pruned_bugs_to_find):
        j+=1
        print "=" * 80
        print "Bug %d of %d" % (j, len(pruned_bugs_to_find))
        bug = bzapi.fetch_complete_bug(bug_id, cache_ok=True)
        requirements[bug_id]['commits'] = for_one_bug(repo_dir, bug_id, bug, upstream)
        util.write_json(uplift.requirements_file, requirements)
    return requirements
Ejemplo n.º 53
0
    def get(self, album_id):
        """GET handler for a particular gallery album.

        URL pattern: /albums/${album_id}
        
        If album exists, returns 200 OK with JSON album data structure.
        Returns Content-type: application/json.
        If album doesn't exist, returns 404 NOT FOUND.

        Returns 401 UNAUTHORIZED to all calls if authorization fails.
        """
        q = Album.all().filter('album_id =', album_id)
        album = q.get()
        if not album:
            return self.error(404)

        write_json(self, album.to_dict())
Ejemplo n.º 54
0
Archivo: peer.py Proyecto: kq2/Ricin
def download(course, item):
    """
    Download peer-grading JSON.
    :param course: A Course object.
    :param item: This JSON item is directly written into saved file.
    :return: None.
    """
    path = "{}/peer_assessment/{}.json"
    path = path.format(course.get_folder(), item["item_id"])

    util.make_folder(path, True)
    util.write_json(path, item)

    content = util.read_file(path)
    content = util.remove_coursera_bad_formats(content)

    util.write_file(path, content)
Ejemplo n.º 55
0
def increment_version(folder, config, level):
  # - iterate through all projects
  # - read the VERSION file and compare to previous version (given)
  # - if equal, automatically increase the version (on the given level)
  # - overwrite the VERSION

  if not "version" in config:
    print("Could not find 'version' in config of %s"%folder)
    return None

  version = SemanticVersion(config["version"]);
  version.increment(level)

  config["version"] = version.str();

  print ("Writing new version: %s"%(version.str()))
  write_json(module_file(folder), config)
Ejemplo n.º 56
0
def get_save_map_info():
    min_lat = min_lon = MAX_NUM
    max_lat = max_lon = -MAX_NUM
    for item in Ways_dict.values():
        for node in item[u"nodes"]:
            tmp_lat = float(node[u"lat"])
            tmp_lon = float(node[u"lon"])
            min_lat = min(min_lat, tmp_lat)
            min_lon = min(min_lon, tmp_lon)
            max_lat = max(max_lat, tmp_lat)
            max_lon = max(max_lon, tmp_lon)
    num_lat = int(math.ceil((max_lat - min_lat) / STEP) + 0.1)
    num_lon = int(math.ceil((max_lon - min_lon) / STEP) + 0.1)
    num_grids = num_lat * num_lon
    map_info = min_lat, max_lat, min_lon, max_lon, num_lat, num_lon, num_grids
    util.write_json(MAP_INFO, INTER_DATA_DIR, map_info)
    return min_lat, max_lat, min_lon, max_lon, num_lat, num_lon, num_grids
Ejemplo n.º 57
0
 def shrinkwrap(self, args=None):
   print("Creating npm-shrinkwrap.json...")
   npm_shrinkwrap(self.root_dir)
   shrinkwrap_file = os.path.join(self.root_dir, "npm-shrinkwrap.json")
   shrinkwrap_conf = read_json(shrinkwrap_file)
   project_config = self.get_project_config()
   deps = shrinkwrap_conf['dependencies']
   devDeps = shrinkwrap_conf['devDependencies'] if 'devDependencies' in shrinkwrap_conf else {}
   for m, __, conf in iterate_modules(self.root_dir, project_config):
     name = conf['name']
     repo = "git+%s#%s"%(m['repository'],m['branch'])
     if name in deps:
       entry = deps[name]
       entry['from'] = repo
     if name in devDeps:
       entry = deps[name]
       entry['from'] = repo
   write_json(shrinkwrap_file, shrinkwrap_conf)
def make_dataset(t1, t2, out_dir):
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

    # we need to map the ids in the yelp data to ints since snap only allows ints as node ids
    id_to_nid = KeyToInt()

    print "Building set of nodes..."
    nids = set()
    for review in reviews_iterator():
        if get_date(review) < t1:
            nids.add(id_to_nid['u' + review['user_id']])
            nids.add(id_to_nid['b' + review['business_id']])

    print "Building user data..."
    write_node_data(lambda user_data: id_to_nid['u' + user_data['user_id']], nids,
                    './data/provided/yelp_academic_dataset_user.json',
                    out_dir + 'user.json')

    print "Building business data..."
    write_node_data(lambda business_data: id_to_nid['b' + business_data['business_id']], nids,
                    './data/provided/yelp_academic_dataset_business.json',
                    out_dir + 'business.json')

    print "Building graph..."
    with open(out_dir + 'graph.txt', 'w') as graph, \
            open(out_dir + 'new_edges.txt', 'w') as new_edges:
        review_data = defaultdict(lambda: defaultdict(list))
        for review in reviews_iterator():
            user_key = id_to_nid['u' + review['user_id']]
            business_key = id_to_nid['b' + review['business_id']]
            if user_key in nids and business_key in nids:
                date = get_date(review)
                if date < t1:
                    review_data[user_key][business_key].append(review)
                    graph.write("{:} {:}\n".format(user_key, business_key))
                elif date < t2:
                    new_edges.write("{:} {:}\n".format(user_key, business_key))

        for u in review_data:
            for b in review_data[u]:
                review_data[u][b] = sorted(review_data[u][b], key=get_date, reverse=True)

        util.write_json(review_data, out_dir + "review.json")