Esempio n. 1
0
def check_users_with_top1000_repo(users, repos=None):
    users = find_all('users')
    if repos is None:
        top1000_repo_count = {
            repo['name']: 0
            for repo in find_all('top1000_repos')
        }
        repos_count = top1000_repo_count
    else:
        repos_count = {repo: 0 for repo in repos}
    for i, user in enumerate(users):
        print(f"{i}/{len(users)} checking user {user['name']}")
        if len(user) == 1:
            # ignore users that not crawl repo yet
            print("not crwal yet, ignore")
            continue
        for repo in user['repos']:
            if repo in repos_count:
                repos_count[repo] += 1
    repos_count = {
        k: v
        for k, v in sorted(
            repos_count.items(), key=lambda item: item[1], reverse=True)
    }

    for repo in repos_count:
        print(f"{repo}: {repos_count[repo]}")
    #print(repos_count)
    return repos_count
Esempio n. 2
0
def predict(username):
    '''
    return top 10 predict repos to user
    '''
    repos = get_user_starred_repo(username)
    positive_set, negative_set = DirectParseData(repo2idx(repos), 838)
    predict_repos = idx2repo(
        GetTopKRecommend(
            path='./predict/weight/model_MAP_0.10040_np_20_epoch_32_d_64',
            all_u=1168,
            all_i=838,
            dim=64,
            epoch=256,
            ratio=20,
            top_k=10,
            positive_set=positive_set,
            negative_set=negative_set))
    predict_detail = find_all(col_name='top1000_repos_detail',
                              target={"full_name": {
                                  "$in": predict_repos
                              }},
                              field_filter={
                                  '_id': 0,
                                  'full_name': 1,
                                  'description': 1,
                                  'language': 1,
                                  'stargazers_count': 1
                              })
    return predict_detail
Esempio n. 3
0
def concurrent_crawl_repo_of_users(max_workers, mode='token'):
    users = find_all('users')
    users = [user for user in users if len(user) == 1]
    print(f'crawl user num: {len(users)}......')
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for i, user in enumerate(users):
            #print(f'crawling {i+1}/{len(users)}...')
            executor.submit(insert_user_starred_repo, user, mode)
Esempio n. 4
0
def crawl_top_users():
    users = find_all('users')
    users = [user for user in users if len(user) == 1]
    for i, user in enumerate(users):
        print(f'crawling {i+1}/{len(users)}...')
        repos = get_user_starred_repo(user['name'])
        print(f"finish {user['name']} total: {len(repos)} update to db")
        update_one('users', {'name': user['name']}, {'repos': repos})
Esempio n. 5
0
def init_users_to_db(users):
    #datas = [{'name': user} for user in users]
    users_in_db = find_all('users', field_filter={'_id': 0, 'repos': 0})
    users_in_db = [user['name'] for user in users_in_db]
    new_users = [user for user in users if user not in users_in_db]
    print(
        f"{len(users) - len(new_users)} users already in db, insert {len(new_users)} users"
    )
    if len(new_users) > 0:
        insert_many('users', [{'name': user} for user in new_users])
Esempio n. 6
0
def concurrent_crawl_top1000_repo_details(max_workers, mode='token'):
    repos = find_all('top1000_repos')
    exist_repos = find_all('top1000_repos_detail',
                           field_filter={
                               '_id': 0,
                               'full_name': 1
                           })
    exist_repo_names = [
        exist_repo['full_name'].lower() for exist_repo in exist_repos
    ]
    repo_names = [
        repo['name'] for repo in repos
        if repo['name'].lower() not in exist_repo_names
    ]
    print(len(repo_names))
    #print(repo_names)
    #exit()
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        for i, repo_name in enumerate(repo_names):
            #print(f'crawling {i+1}/{len(users)}...')
            executor.submit(insert_repo_detail, repo_name, mode)
Esempio n. 7
0
def check_user_star_in_repos(users, repos=None):
    if repos is None:
        top1000_repos = [repo['name'] for repo in find_all('top1000_repos')]
        repos = top1000_repos
    users = [user for user in users if len(user) == 2]
    for user in users:
        user['repos'] = [
            repo for repo in user['repos'] if repo in top1000_repos
        ]
    users.sort(key=lambda item: len(item['repos']), reverse=True)
    for user in users:
        print(f"{user['name']} : {len(user['repos'])}")
Esempio n. 8
0
def generate_dataset(user_treshold=20, repos_threshold=50):
    users = find_all('users')
    top1000_repos = [repo['name'] for repo in find_all('top1000_repos')]
    users = filter_users(users, top1000_repos, user_treshold)
    repos = filter_repos(users, top1000_repos, repos_threshold)
    users = filter_users(users, repos, user_treshold)
    # reverse user click star repos to [early:late]
    for user in users:
        user['repos'].reverse()
    print(
        f'user > {user_treshold} : {len(users)}, repos > {repos_threshold} : {len(repos)}'
    )
    item_index = list(range(len(repos)))
    user_index = list(range(len(users)))
    item2id = {pair[0]: pair[1] for pair in zip(repos, item_index)}
    user2id = {pair[0]['name']: pair[1] for pair in zip(users, user_index)}
    dataset = []
    for user in users:
        dataset.append({
            "UserId":
            user2id[user['name']],
            "ItemId":
            str([item2id[repo]
                 for repo in user['repos']])[1:-1].replace(',', '')
        })
    df = pd.DataFrame(dataset)
    df.to_csv('dataset.csv', index=False)

    df = pd.DataFrame([{
        'repo': pair[0],
        'idx': pair[1]
    } for pair in zip(repos, item_index)])
    df.to_csv('repo2idx.csv', index=False)

    df = pd.DataFrame([{
        'user': pair[0]['name'],
        'idx': pair[1]
    } for pair in zip(users, user_index)])
    df.to_csv('user2idx.csv', index=False)
Esempio n. 9
0
def random_get_repos(n=10):
    df = pd.read_csv('repo2idx.csv')
    items = list(df['repo'])
    return find_all(col_name='top1000_repos_detail',
                    target={"full_name": {
                        "$in": sample(items, n)
                    }},
                    field_filter={
                        '_id': 0,
                        'full_name': 1,
                        'description': 1,
                        'language': 1,
                        'stargazers_count': 1
                    })
Esempio n. 10
0
def k_means():
	dataList = list(database.find_all())
	vectors = []
	uuids = []
	for data in dataList:
		counter = Counter()
		uuids.append(data['uuid'])
		for event in data['events']:
				counter[event['name']] += 1
		vector = []
		for typ in counter:
			vector.append(counter[typ])
		vectors.append(vector)

	result = vectors
	labels, error, nfound = Pycluster.kcluster(vectors, 3)

	classes = []
	for label in labels:
		classes.append(numpy.asscalar(label))
	result = dict(zip(uuids,classes))
	return result
def main():

    # Call AI module to get prediction of BP1, BP2, Oxy Pulse
    p = predictor()
    p.train()
    predictedData = p.predict(5000)

    # get new data of BP1, BP2, Oxy, Pulse from DB
    # latestData = database.find()

    # call Display module to get format the html

    #parse data into graphable format
    timeData = []
    pulseData = []
    bpSysData = []
    bpDiaData = []
    oxygenData = []

    for i in database.find_all():
        p.appendData(i)
        x_val = i['createAt']
        pulse_val = i['pulse']
        bp_sys_val = i['bloodPreSys']
        bp_dia_val = i['bloodPreDia']
        oxy_val = i['bloodOx']
        timeData.append(x_val)
        pulseData.append(pulse_val  )
        bpSysData.append(bp_sys_val)
        bpDiaData.append(bp_dia_val)
        oxygenData.append(oxy_val)

    p.train()
    predictedData = p.predict(5000) #5000 is time in the future

    alert = {'bloodPressureSys': globals.bp_sys_flag, 'bloodPressureDia': globals.bp_dia_flag,'pulse': globals.pulse_flag, 'oxygenContent': globals.oxygen_flag}
    data = {'time': timeData,'pulse': pulseData, 'bloodPressureSys': bpSysData, 'bloodPressureDia': bpDiaData,'oxygenContent': oxygenData}
    AIdata = {'AIpulse': predictedData[0], 'AIbloodPressure': predictedData[1], 'AIoxygenContent': predictedData[2]}
    return render_template('display.html', title='Display', alert=alert, data=data, AIdata=AIdata)
Esempio n. 12
0
	def get(self):
		dataList = list(database.find_all())
		return json_dump(dataList)
Esempio n. 13
0
 def get(self):
     rows = find_all()
     response = HTTPResponse()
     response.write(rows)
     return response
Esempio n. 14
0
 def __handle_get_all(reply):
     reply["type"] = "SUCCESS"
     reply["data"] = database.find_all()
Esempio n. 15
0
 def on_get(self, req, resp):
     resp.status = falcon.HTTP_200
     rows = find_all()
     resp.body = rows
Esempio n. 16
0
def find():
    rows = find_all()
    resp = Response(None, status=200, mimetype='application/json')
    resp.data = rows
    return resp