Ejemplo n.º 1
0
def get_dist_by_city_state(city1=None, state1=None, city2=None, state2=None):
    if state1.lower() == state2.lower() and city1.lower() == city2.lower():
        return 0
    search = SearchEngine()
    int_zip1_list = sorted([
        x.zipcode for x in search.by_city_and_state(city=city1, state=state1)
    ])
    int_zip2_list = sorted([
        x.zipcode for x in search.by_city_and_state(city=city2, state=state2)
    ])
    # randomly select 1 zip
    # print('using zip {} and {}'.format(int_zip1_list[0], int_zip2_list[0]))
    dist = get_dist_by_zip(int_zip1_list[0], int_zip2_list[0])
    return dist
Ejemplo n.º 2
0
def getZipcode(city, state):
    search = SearchEngine()
    zipSearch = search.by_city_and_state(city, state)
    zipcode = zipSearch[0]
    zipcode = zipcode.zipcode

    return zipcode
Ejemplo n.º 3
0
def main():
    state2 = input("What" + '\x1b[1;31;40m' + ' state ' + '\x1b[0m' +
                   "do you want the temperature of?\n")
    city2 = input("What" + '\x1b[1;31;40m' + ' city ' + '\x1b[0m' +
                  "do you want the temperature of?\n")
    search = SearchEngine(simple_zipcode=True)
    res = search.by_city_and_state(city2,
                                   state2,
                                   zipcode_type='Standard',
                                   sort_by='zipcode',
                                   ascending=True,
                                   returns=5)
    len(res)
    try:
        zipcode = res[0]
    except IndexError:
        print("Please type in a valid USA State/City\n")
        main()
    zipcode
    city = zipcode.major_city
    state = zipcode.state
    urlend = zipcode.zipcode
    URL = 'https://weather.com/weather/today/l/' + urlend
    page = requests.get(URL)
    soup = BeautifulSoup(page.content, 'html.parser')
    temp = soup.find(class_='CurrentConditions--tempValue--3KcTQ').get_text()
    print('\nThe temperature right now in ' + city + ', ' + state + ' is ' +
          '\x1b[1;32;40m' + temp + '\x1b[0m' + "\n")
    main()
Ejemplo n.º 4
0
def get_lat_lng_of_city_state(city, state):
    count = 0
    search = SearchEngine(simple_zipcode=True)
    info = []
    res = search.by_city_and_state(city=city, state=state)
    for zip in res:
        if zip.lat is not None and count == 0:
            info.append(zip.lat)
            info.append(zip.lng)
            count = count + 1
    return info
Ejemplo n.º 5
0
 def getZip(str, zipList):
     if str.isdigit():
         zipList.append(str)
     else:
         try:
             search = SearchEngine(simple_zipcode=True)
             city_state_str = str.split(', ')
             city_state = search.by_city_and_state(city_state_str[0],
                                                   city_state_str[1])
             zipList.append(city_state[0].zipcode)
         except:
             zipList.append("None")
def fill_missing_arguments(args: dict) -> dict:
    search_engine = SearchEngine(db_file_dir="backend/tmp")
    if args['zipcode'] != 0:
        zipcode = search_engine.by_zipcode(args['zipcode'])
        location = [zipcode.lng, zipcode.lat]
        args['coordinates'] = location
    elif args['city'] != '*' and args['state'] != '*':
        zipcode = search_engine.by_city_and_state(args['city'], args['state'])[0]
        args['zipcode'] = zipcode.zipcode
        location = [zipcode.lng, zipcode.lat]
        args['coordinates'] = location
    return args
Ejemplo n.º 7
0
def search():
    """
    This function takes city and state as an input, and returns
    the string value. Search() also will automatically execute
    the same logic found in lookup().
    """

    code = input("Enter [city,state]: ").split(',')
    a = SearchEngine(simple_zipcode=True)
    zipcode = a.by_city_and_state(code[0], code[1])[0]
    print(colored("Zipcode Information:", "green"))
    pprint(zipcode.to_json())
    target = " ".join(code)
    return target
Ejemplo n.º 8
0
def get_zips_for_city(city, state):
    # get zips that with income greater than median
    search = SearchEngine(simple_zipcode=True)
    res = search.by_city_and_state(city, state, returns=0)
    print('# of zips are found: {}'.format(len(res)))
    df_zip = pd.DataFrame([{
        'zip': e.zipcode,
        'home_value': e.median_home_value,
        'income': e.median_household_income
    } for e in res])
    income_threshold = df_zip['income'].quantile(0.5)
    zips = df_zip.loc[df_zip['income'] >= income_threshold, 'zip'].values
    print('# of zips are found with income greater than median: {}'.format(
        len(zips)))
    return zips
Ejemplo n.º 9
0
        country = dfN.loc[idx, 'country']
        if country not in focus:
            dfN.loc[idx, 'division'] = country

        # convert sets of states into subnational regions
        division = dfN.loc[idx, 'division']
        if division not in ['', 'unknown']:
            if division in geoLevels.keys():
                dfN.loc[idx, 'country'] = geoLevels[dfN.loc[idx, 'division']]

        # convert sets of cities into sub-state regions
        location = dfN.loc[idx, 'location']
        # print(location)
        if location not in ['', 'unknown'] and division == 'Connecticut':
            try:
                res = search.by_city_and_state(location, "CT")
                area_zip = res[0].zipcode
                if area_zip in geoLevels.keys():
                    dfN.loc[idx, 'location'] = geoLevels[area_zip]
                else:
                    print(row['location'] + ' has a zip code (' + area_zip +
                          ') not found in the geo-scheme.)')
                    notfound.append(location)
            except:
                notfound.append(location)
                dfN.loc[idx, 'location'] = ''

        # flatten location names as division names for divisions that are not a focus of study
        if division not in focus:
            dfN.loc[idx, 'location'] = division
        print('Processing metadata for... ' + row['strain'])
def main():
    search_dir = 'twitter_geo_searches/'
    if not os.path.exists(os.path.dirname(search_dir)):
        os.makedirs(os.path.dirname(search_dir), 0o755)

    twpy_api = auth.get_access_creds()
    pool = multiprocessing.Pool(max(1, multiprocessing.cpu_count() - 1))

    # set up the command line arguments
    parser = argparse.ArgumentParser(
        description=
        'Get twitter user ids and their follower ids from Tweepy and save in different formats'
    )
    subparsers = parser.add_subparsers(dest='mode')

    search_parser = subparsers.add_parser(
        'search',
        help='Gather Twitter user ids and followers by city, state and radius')
    search_parser.add_argument(
        '-c',
        '--city',
        required=True,
        action='store',
        dest='city',
        help='City to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-s',
        '--state',
        required=True,
        action='store',
        dest='state',
        help='State to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-r',
        '--radius',
        required=True,
        action='store',
        dest='radius',
        help=
        'Radius to search Twitter API for user ids (miles or kilometers -- ex: 50mi or 50km). REQUIRED'
    )
    search_parser.add_argument(
        '-f',
        '--filename',
        required=True,
        action='store',
        dest='filename',
        help='Name of output file for networkx graph data. REQUIRED')

    netx_parser = subparsers.add_parser(
        'netx', help='Perform operations on already generated networkx graph')
    netx_parser.add_argument('-q',
                             '--clique',
                             action='store_true',
                             help='Find cliques with networkx')
    netx_parser.add_argument(
        '-x',
        '--clq_filename',
        action='store',
        help='Provide a filename for the serialized output of find_cliques')
    netx_parser.add_argument('-g',
                             '--graph_filename',
                             required=True,
                             action='store',
                             dest='graph_filename',
                             help='Networkx input data filename. REQUIRED')
    netx_parser.add_argument('-o',
                             '--out_filename',
                             required=True,
                             action='store',
                             dest='out_filename',
                             help='Networkx output data filename REQUIRED')
    netx_parser.add_argument('-k',
                             '--comm',
                             action='store_true',
                             help='Find communities with networkx')
    netx_parser.add_argument('-p',
                             '--print_graph',
                             action='store_true',
                             help='Print networkx graph')

    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    if not args.mode:
        print('ERROR: No arguments provided. Use -h or --help for help')
        return

    if args.mode == 'search':
        city = args.city
        state = args.state
        search_radius = args.radius
        search_filename = args.filename + '.json'

        # gets the first 50 zip codes by city and state
        zip_search = SearchEngine()
        zipcodes = zip_search.by_city_and_state(city, state, returns=50)

        user_ids = []
        user_followers = []
        # gets the user ids at each geo-location for the retrieved zip codes
        bar = pyprind.ProgPercent(len(zipcodes),
                                  track_time=True,
                                  title='Finding user ids')
        for zipcode in zipcodes:
            bar.update(item_id=str(zipcode.zipcode) + '\t')
            latitude = zipcode.lat
            longitude = zipcode.lng
            user_ids.extend(
                get_user_ids(twpy_api, latitude, longitude, search_radius))

        n = 2
        # gets the followers of all the retrieved user ids n number of depths
        for i in range(0, n):
            user_ids, user_followers = get_user_followers(
                twpy_api, set(user_ids))

        filename = os.path.join(search_dir, search_filename)
        save_user_follower_networkx_graph(user_followers, filename)

    if args.mode == 'netx':
        graph_filename = os.path.join(search_dir,
                                      args.graph_filename + '.json')
        output_filename = os.path.join(search_dir, args.out_filename + '.json')
        graph = open_nx_graph(graph_filename)
        cliques = []

        if args.clique:
            for clique in pool.map(gather_cliques, nx.find_cliques(graph)):
                cliques.append([int(member) for member in clique])

            with open(output_filename, 'w') as output:
                for clique in cliques:
                    output.write('%s,\n' % (clique))

        elif args.comm:
            if args.clq_filename:
                clique_filename = os.path.join(search_dir,
                                               args.clq_filename + '.json')
                # load the clique topology file
                with open(clique_filename, 'r') as find_cliques_file:
                    cliques = [
                        clique for cliques in find_cliques_file
                        for clique in ast.literal_eval(cliques)
                    ]

            with open(output_filename, "w") as output:
                for node in pool.map(gather_cliques,
                                     community.girvan_newman(graph)):
                    print(node)
                    #output.write(str([int(item) for item in node]) + ', \n')
        elif args.print_graph:
            nx.draw(graph)
            plt.show()

    print("Job complete")
def main():
    # set up the command line arguments
    parser = argparse.ArgumentParser(
        description=
        'Get twitter user ids and their follower ids using Tweepy and save in different formats'
    )
    subparsers = parser.add_subparsers(dest='mode')

    search_parser = subparsers.add_parser(
        'search', help='Gather Twitter user ids by city, state and radius')
    search_parser.add_argument(
        '-c',
        '--city',
        required=True,
        action='store',
        dest='city',
        help='City to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-s',
        '--state',
        required=True,
        action='store',
        dest='state',
        help='State to search for Twitter user ids. REQUIRED')
    search_parser.add_argument(
        '-r',
        '--radius',
        required=True,
        action='store',
        dest='radius',
        help=
        'Radius to search Twitter API for user ids (miles or kilometers -- ex: 50mi or 50km). REQUIRED'
    )
    search_parser.add_argument(
        '-d',
        '--depth',
        required=True,
        action='store',
        dest='depth',
        help=
        'This value represents how far to traverse into user follower relationships when gathering users. REQUIRED'
    )
    search_parser.add_argument(
        '-f',
        '--filename',
        required=True,
        action='store',
        dest='filename',
        help='Name of output file to store gathered users in. REQUIRED')
    search_parser.add_argument(
        '-z',
        '--creds',
        required=True,
        action='store',
        dest='creds',
        help='Path to Twitter developer access credentials REQUIRED')

    continue_parser = subparsers.add_parser(
        'getfws',
        help=
        'Takes in already gathered jsonified list of users and retrieves their followers'
    )
    continue_parser.add_argument(
        '-f',
        '--filename',
        action='store',
        help=
        'Filename of the previously saved Twitter users ids in .json format')
    continue_parser.add_argument(
        '-d',
        '--depth',
        required=True,
        action='store',
        dest='depth',
        help=
        'This value represents how far to traverse into user follower relationships when searching for followers. REQUIRED'
    )
    continue_parser.add_argument(
        '-z',
        '--creds',
        required=True,
        action='store',
        dest='creds',
        help='Path to Twitter developer access credentials REQUIRED')

    convert_parser = subparsers.add_parser(
        'convert',
        help=
        'Convert user followers dict to users list and save file. This is the file format used when continuing the get followers function and in get_community_tweets.py'
    )
    convert_parser.add_argument(
        '-i',
        '--input_file',
        action='store',
        help='Filename of the previously saved followers dictionary')
    convert_parser.add_argument(
        '-o',
        '--out_file',
        action='store',
        help=
        'Filename to store the output. Just the filename no path is needed. The output file will be saved in the folder of the input file'
    )

    netx_parser = subparsers.add_parser(
        'netx', help='Create cliques or communities from user follower data')
    group = netx_parser.add_mutually_exclusive_group(required=True)
    group.add_argument('-q',
                       '--gen_cliques',
                       required=False,
                       action='store_true',
                       dest='gen_cliques',
                       help='Generate cliques from user followers dictionary')
    group.add_argument(
        '-c',
        '--gen_comms',
        required=False,
        action='store_true',
        dest='gen_comms',
        help='Generate communities from user followers dictionary')
    netx_parser.add_argument(
        '-n',
        '--min_size',
        action='store',
        dest='min_size',
        nargs='?',
        type=int,
        const=1,
        default=4,
        help='Constraint for min size of clique or community (default is 4)')
    netx_parser.add_argument('-i',
                             '--in_filename',
                             required=True,
                             action='store',
                             dest='in_filename',
                             help='User followers dictionary file REQUIRED')
    netx_parser.add_argument('-o',
                             '--out_filename',
                             required=True,
                             action='store',
                             dest='out_filename',
                             help='Output topology filename REQUIRED')

    argcomplete.autocomplete(parser)
    args = parser.parse_args()

    if args.mode == 'convert':
        working_dir = get_directory_of_file(args.input_file)
        convert_followers_to_users(args.input_file, args.out_file, working_dir)

    if args.mode == 'getfws':
        twpy_api = auth.get_access_creds(args.creds)

        if not twpy_api:
            print('Error: Twitter developer access credentials denied')
            return

        working_dir = get_directory_of_file(args.filename)

        user_ids = read_json(args.filename)
        if not user_ids:
            print('Error: No users found in provided file')
            return

        # gets the followers of all the retrieved user ids 'depth' number of times
        collect_user_followers(args.depth, twpy_api, working_dir,
                               args.filename, user_ids)

    if args.mode == 'search':
        twpy_api = auth.get_access_creds(args.creds)

        if not twpy_api:
            print('Error: Twitter developer access credentials denied')
            return

        working_dir = get_directory_of_file(args.filename)

        # gets the first 50 zip codes by city and state
        zip_search = SearchEngine()
        zipcodes = zip_search.by_city_and_state(args.city,
                                                args.state,
                                                returns=50)

        user_ids = []
        user_followers = []
        # gets the user ids at each geo-location for the retrieved zip codes
        bar = pyprind.ProgPercent(len(zipcodes),
                                  track_time=True,
                                  title='Finding user ids')
        for zipcode in zipcodes:
            bar.update(item_id='zip code:' + str(zipcode.zipcode) + '\t')
            user_ids.extend(
                get_user_ids(twpy_api, zipcode.lat, zipcode.lng, args.radius))
            write_json(args.filename, list(set(user_ids)))

    if args.mode == 'netx':
        user_followers = read_json(args.in_filename)
        pythonify_dict(user_followers)
        print("Number of followers: " + str(len(user_followers)))
        output_filename = args.out_filename + '.json'
        graph = build_netx_graph(user_followers)

        if args.gen_cliques:
            generate_cliques(graph, output_filename, args.min_size)
        if args.gen_comms:
            generate_communities(graph, output_filename, args.min_size)
Ejemplo n.º 12
0
def citystate(field):

    stmt = db.session.query(City_zip).statement
    df_census = pd.read_sql_query(stmt, db.session.bind)

    from keras.models import load_model
    from uszipcode import SearchEngine
    search = SearchEngine(simple_zipcode=True)
    print(field)
    city = field.split('-')[0]
    state = field.split('-')[1]
    res = search.by_city_and_state(city,state, returns = 30)
    total = len(res)
    lat = res[0].lat
    lng = res[0].lng

    # print(lat)
    print(total)

    import random

    if total <= 10:
        count =[]
        for x in range(total):
            count.append(int(x))
    else:
        count = random.sample(range(0, total), (total-1))
        # count = 5

    print(count)

    zipArry = []
    ziplat = []
    ziplng = []

    for x in count:
        print(x)
        item = res[x]
        print(item)
        if (item.lat == None):
            continue
        else:
            zipArry.append(item.zipcode)
            ziplat.append(float(item.lat))
            ziplng.append(float(item.lng))

    df_test = pd.DataFrame({'Zipcode': zipArry, 'Latitude': ziplat, 'Longitude': ziplng})

    print(df_test)

    merge_table = pd.merge(df_census, df_test, on="Zipcode", how='inner')
    merge_table = merge_table[(merge_table != 0).all(1)]
    merge_table = merge_table[(merge_table != '').all(1)]
    merge_table = merge_table.dropna()
    
    print(merge_table)

    #["MedianAge", "HouseholdIncome", "PerCapitaIncome", "PovertyRate"]

    from joblib import dump, load
    loaded_model = load('classifier.joblib') 

    # print("loading neural model")
    # loaded_model = load_model("neural.h5")
    # print("model loaded")
    merge_table['prediction'] = ""

    print("starting for loop")
    for index,row in merge_table.iterrows():
        #for logistic regression
        input_data = [float(row['MedianAge']), float(row['HouseholdIncome']),\
            float(row['PerCapitaIncome']), float(row['PovertyRate'])]
        #for neural network
        a = np.array([input_data])
        print(a)
        # for logistic regression
        result = loaded_model.predict([input_data])[0]
        merge_table.at[index, 'prediction'] = result
        # for neural network model
        # result = loaded_model.predict_classes(a)
        # if result[0] == 2:
        #     final = 'high'
        # elif result[0] == 1:
        #     final = 'medium'
        # else:
        #     final = 'low'
        # merge_table.at[index, 'prediction'] = final

    print(merge_table)

    ###########################
    data = {
        "total_results": len(count),
        "latitude": merge_table.Latitude.tolist(),
        "longitude": merge_table.Longitude.tolist(),
        "zipcode": merge_table.Zipcode.tolist(),
        "MedianAge": merge_table.MedianAge.tolist(),
        "HouseholdIncome": merge_table.HouseholdIncome.tolist(),
        "PerCapitaIncome": merge_table.PerCapitaIncome.tolist(),
        "PovertyRate": merge_table.PovertyRate.tolist(),
        "Predictions": merge_table.prediction.tolist()
        # "zipcode": zipArry

    }

    return jsonify(data)
Ejemplo n.º 13
0
    baltimore_county_map, # lc1
    richmond_city_map, # lc2
    henrico_county_map, # lc2
    phoenix_city_map, # lc3
    maricopa_county_map, # lc3
    houston_city_map, # lc4
    harris_county_map # lc4
]

### Get all Zips

all_zip = []

# search for all the zips and add them to a list
for cs in city_state:
    temp1 = search.by_city_and_state(cs[0], cs[1])
    for t in temp1:
        tempL = [cs[0]]
        tempL.append(t.zipcode)
        if len(t.common_city_list) != 0:
            tempL.append(t.common_city_list[-1])
        else:
            tempL.append(cs[0]+'?')
        tempL.append(t.lat)
        tempL.append(t.lng)
        all_zip.append(tempL)

# format for all_zip [[city, zip, common city, lat, long]]

# remove the none from the cord and put the previous val in
prevE = []
Ejemplo n.º 14
0
def deal_location(deal):
    offer_location = deal['Location'].tolist()
    search = SearchEngine(simple_zipcode=True)

    city_list = []
    state_list = []
    zipcode_list = []
    for location in offer_location:
        try:
            city = location.split(',')[0].lstrip().lower().replace('msa', '')
            state = location.split(',')[1].lstrip().lower().replace('msa', '')
            zipcode = search.by_city_and_state(city,
                                               state,
                                               sort_by=Zipcode.population,
                                               ascending=False,
                                               returns=1)[0].zipcode
        except (ValueError, IndexError, AttributeError):
            city = 'nan'
            state = 'nan'
            zipcode = 'nan'
        city_list.append(city)
        state_list.append(state)
        zipcode_list.append(zipcode)

    deal['zipcode'] = pd.Series(zipcode_list)
    deal = deal[deal['zipcode'] != 'nan']
    deal_totalcat = pd.DataFrame(deal.groupby(
        ['zipcode'])['TotalCAT'].sum()).reset_index().drop_duplicates()
    deal_totalcmt = pd.DataFrame(
        deal.groupby([
            'zipcode'
        ])['Totalactivitys'].sum()).reset_index().drop_duplicates()
    off_total = deal_totalcat.merge(deal_totalcmt, how='left', on=['zipcode'])
    off_zipcode = off_total[off_total['TotalCAT'] > 0].reset_index(drop=True)

    off_lat = []
    off_lon = []
    for ele in off_zipcode['zipcode']:
        ele_search = search.by_zipcode(ele)
        off_lat.append(ele_search.lat)
        off_lon.append(ele_search.lng)

    off_zipcode['deal_lat'] = pd.Series(off_lat)
    off_zipcode['deal_lon'] = pd.Series(off_lon)
    off_zipcode = off_zipcode.dropna().reset_index(drop=True)

    lat = off_zipcode['deal_lat'].values
    lon = off_zipcode['deal_lon'].values
    total_activity = off_zipcode['Totalactivitys'].values
    total_CAT = off_zipcode['TotalCAT'].values

    # 1. Draw the map background
    fig = plt.figure(figsize=(30, 30))
    m = Basemap(projection='lcc',
                resolution='h',
                lat_0=37.09,
                lon_0=-95.71,
                width=6E6,
                height=4E6)
    m.shadedrelief()
    m.drawcoastlines(color='gray')
    m.drawcountries(color='gray')
    m.drawstates(color='gray')

    # 2. scatter city data, with color reflecting population
    # and size reflecting area
    m.scatter(lon,
              lat,
              latlon=True,
              s=total_CAT,
              c=total_activity,
              cmap='Blues',
              alpha=1)

    # 3. create colorbar and legend
    cbar = plt.colorbar(label=r'Number of Total Activity in deal')
    cbar.ax.tick_params(labelsize=20)
    #plt.clim(30, 70)

    # make legend with dummy points
    for a in [400, 1000, 3000]:
        plt.scatter([], [],
                    c='blue',
                    alpha=1,
                    s=a,
                    label=str(a) +
                    ' Thousand $ in Total activity Amount in deals')
    plt.legend(title='Size of the dot indicates...',
               scatterpoints=1,
               frameon=False,
               labelspacing=1,
               loc='lower left')
    plt.title('deal Location in the USA', fontsize=20)
    plt.tight_layout()
    plt.savefig('deal location in the USA.png')
    plt.show()
    return off_zipcode