예제 #1
0
def main():
    parser = ArgumentParser()
    parser.add_argument(
        '--location_file',
        default='../../data/facebook-maria/PR_municipalities.tsv')
    parser.add_argument('--search_query', default='Huracan Maria')
    parser.add_argument('--out_dir', default='../../data/facebook-maria/')
    args = parser.parse_args()
    location_file = args.location_file
    search_query = args.search_query
    out_dir = args.out_dir

    ## load location names
    location_data = pd.read_csv(location_file, sep='\t', index_col=None)
    location_names = location_data.loc[:, 'Municipality'].values.tolist()

    ## connect to API
    app_id, app_secret, access_token = load_facebook_auth()
    graph = GraphAPI(access_token=access_token, version='2.10')

    # search for groups by location name!!
    location_data = []
    loc_index = ['location_name', 'group_id', 'group_name']
    for location_name in location_names:
        print('processing location %s' % (location_name))
        location_search_name = '%s %s' % (search_query, location_name)
        groups = graph.search(type='group', q=location_search_name)['data']
        open_groups = filter(lambda x: x['privacy'] == 'OPEN', groups)
        if (len(open_groups) > 0):
            group_data = [[location_name, g['id'], g['name']]
                          for g in open_groups]
            location_data += group_data
    location_data = pd.DataFrame(location_data, columns=loc_index)

    ## write to file
    out_file = os.path.join(out_dir, 'location_group_data.tsv')
    location_data.to_csv(out_file, sep='\t', index=False, encoding='utf-8')
예제 #2
0
파일: fb_coverage.py 프로젝트: rnomadic/ETL
curs.execute(MADE_ARTISTS_QUERY)

artists = curs.fetchall()

found = set()
missed = set()

for a in artists:
    a = a[0]
    if cache.get(':::'.join([cache_key, a])):
        continue

    try_count = 5
    try:
        res = graph.search(type='page', q=' '.join([a, 'official']))
    except GraphAPIError as e:
        print('1', e)
        res = None

    if res:
        data = res['data']
        if len(data) == 0:
            try:
                res = graph.search(type='page', q=a)
                data = res['data']
            except GraphAPIError as e:
                print('2', e)
                continue
        has_insights = False
        while try_count > 0 and len(data) > 0: