Exemple #1
0
def get_TA125_by_dates(start_date, end_date):

    days_count = (end_date - start_date).days + 1
    manual_requests = get_requests_from_HAR_file('HARS/bizportal_HAR.json')
    my_request = {}
    my_request[
        'url'] = "http://www.bizportal.co.il/Quote/Transactions/HistoricalRates_AjaxBinding_Read/33333333?startD={s_day}%2F{s_mon}%2F{s_year}&endD={e_day}%2F{e_mon}%2F{e_year}&take={page_size}&skip=0&page=1&pageSize={page_size}&sort%5B0%5D%5Bfield%5D=DealDate&sort%5B0%5D%5Bdir%5D=desc".format(
            s_year=start_date.year,
            s_mon='{:02d}'.format(start_date.month),
            s_day='{:02d}'.format(start_date.day),
            e_year=end_date.year,
            e_mon='{:02d}'.format(end_date.month),
            e_day='{:02d}'.format(end_date.day),
            page_size=days_count)
    #my_request['url'] =	"http://www.bizportal.co.il/Quote/Transactions/HistoricalRates_AjaxBinding_Read/33333333?startD=10%2F03%2F2016&endD=26%2F03%2F2016&take={page_size}&skip=0&page=1&pageSize={page_size}&sort%5B0%5D%5Bfield%5D=DealDate&sort%5B0%5D%5Bdir%5D=desc".format(page_size=10)
    my_request['headers'] = HAR_to_dict(
        manual_requests[1]['request']['headers'])
    my_request['cookies'] = HAR_to_dict(
        manual_requests[1]['request']['cookies'])

    response = requests.get(url=my_request['url'],
                            headers=my_request['headers'],
                            cookies=my_request['cookies'],
                            verify=False)
    json_str_content = response.content.replace('"', '\"')
    results = json.loads(json_str_content)
    if results['Errors'] is None:
        return results['Data']
    else:
        return None
Exemple #2
0
def get_weather_monthly_data(month, year):
    from time import strptime

    my_request[
        'url'] = "https://www.wunderground.com/history/airport/LLBG/{year}/{month}/1/MonthlyHistory.html?&reqdb.zip=&reqdb.magic=&reqdb.wmo=".format(
            month=month, year=year)
    my_request['headers'] = HAR_to_dict(
        manual_requests[0]['request']['headers'])
    my_request['cookies'] = HAR_to_dict(
        manual_requests[0]['request']['cookies'])

    response = requests.get(url=my_request['url'],
                            headers=my_request['headers'],
                            cookies=my_request['cookies'])
    tree = html.fromstring(response.content)

    #check we got the right mothly info
    page_year = tree.xpath('//*[@id="obsTable"]/thead/tr/th[1]')[0].text
    page_month = tree.xpath('//*[@id="obsTable"]/tbody[1]/tr/td[1]')[0].text
    if ((month != strptime(page_month, '%b').tm_mon)
            or (int(page_year) != year)):
        print "Error. Got wrong month/year ({page_month}/{page_year}) instead of {month}/{year}".format(
            month=month, year=year, page_year=page_year, page_month=page_month)
        return None

    data = {}
    fields = [
        'Day', 'Temp_H', 'Temp_A', 'Temp_M', 'DewPoint_H', 'DewPoint_A',
        'DewPoint_M', 'Humidity_H', 'Humidity_A', 'Humidity_M',
        'SeaLevelPress_H', 'SeaLevelPress_A', 'SeaLevelPress_M',
        'Visibility_H', 'Visibility_A', 'Visibility_M', 'Wind_H', 'Wind_A',
        'Wind_M', 'Precip', 'Events'
    ]
    #fields_xpath_suffixes = ['td[1]',td[3]/span','td[6]/span','td[9]/span','td[12]/span','td[15]/span','td[18]/span','td[20]/span','td[21]']

    idx = 2
    while (True):
        columns = [
            td.text_content().strip() for td in tree.xpath(
                '//*[@id="obsTable"]/tbody[{row}]/tr/td'.format(row=idx))
        ]
        if (not columns):
            break

        date_key = "{year}/{month}/{day}".format(year=year,
                                                 month=month,
                                                 day=columns[0])
        data[date_key] = dict(zip(fields, columns))
        idx = idx + 1

    return data
Exemple #3
0
def testing_tase_data():
    #####################################################################################################
    #trying with csv request - got stuck in the csv reader or maybe even the request itself is not good
    #####################################################################################################
    manual_requests = get_requests_from_HAR_file('HARS/tase_csv_request.json')
    my_request = {}
    my_request['url'] = manual_requests[0]['request']['url']
    my_request['headers'] = HAR_to_dict(
        manual_requests[0]['request']['headers'])
    my_request['cookies'] = HAR_to_dict(
        manual_requests[0]['request']['cookies'])
    #response = requests.get(url=my_request['url'], headers=my_request['headers'], cookies=my_request['cookies'], verify=False)#, queryString=my_request['queryString'])
    import csv
    cr = csv.reader(open(my_request['url'], "rb"))
    for row in cr:
        print row

    sys.exit()

    ############################################################################################
    #trying scrapping the html page...doen't work for some reason - maybe the script issue...
    ############################################################################################
    manual_requests = get_requests_from_HAR_file('HARS/tase_HAR.json')
    my_request = {}
    cache_flag = True

    my_request[
        'url'] = "https://www.tase.co.il/Heb/MarketData/Indices/MarketCap/Pages/IndexHistoryData.aspx?Action=3&addTab=&IndexId=137"
    my_request['headers'] = HAR_to_dict(
        manual_requests[0]['request']['headers'])
    my_request['cookies'] = HAR_to_dict(
        manual_requests[0]['request']['cookies'])
    my_request['queryString'] = HAR_to_dict(
        manual_requests[0]['request']['queryString'])
    my_request['postData'] = manual_requests[0]['request']['postData']['text']

    #print response.status_code
    #print response.content
    #sys.exit()

    html_content = None
    if (cache_flag is True):
        html_content = open('response.html', 'r').read()
    else:
        response = requests.post(
            url=my_request['url'],
            headers=my_request['headers'],
            cookies=my_request['cookies'],
            data=my_request['postData'],
            verify=False)  #, queryString=my_request['queryString'])
        html_content = response.content

    tree = html.fromstring(html_content)

    idx = 2
    while (True):
        #row_xpath='//*[@id="ctl00_SPWebPartManager1_g_54223d45_af2f_49cf_88ed_9e3db1499c51_ctl00_HistoryData1_gridHistoryData_DataGrid1"]/tbody/tr[{row}]/td[{column}]'.format(row=idx,column=7)
        row_xpath = '//*[@id="u1st_Skip-links"]/span[1]/a'
        #date is 7.	base madad is 6 and so on
        a = tree.xpath(row_xpath)
        print a
        if not a:
            break

        idx = idx + 1
Exemple #4
0
def set_my_keywords(keyword=None, os_version='android'):
    ios_headers_for_add_keyword_HAR = [{
        "name":
        "Cookie",
        "value":
        "__uvt=; _ga=GA1.2.873370462.1510860176; intercom-id-pjtwd42d=cbbd47f7-be1f-4db9-9f3a-86593757378c; mp_f9c053f6cb8aa27c2fe7abfb4847484a_mixpanel=%7B%22distinct_id%22%3A%20%2215fc65ea98426b-07d4a50c6429e1-3b3e5906-15f900-15fc65ea98510d%22%2C%22utm_source%22%3A%20%22searchman%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fsearchman.com%2Fios%2Fapp%2Fus%2F493253309%2Fen%2Fblockchain%2Fblockchain-bitcoin-wallet%2F%3Fd%3DiPhone%22%2C%22%24initial_referring_domain%22%3A%20%22searchman.com%22%7D; ag_uh=3a435e642830ef73bee7e0e0d5ed1358; ag_uhc=75b60a7055763f595e1d3e899bc578ed; ag-portfolio=%5B%22ios-886427730%22%5D; ag_bh=806719182%3AUS%2C1291851950%3AUS%2C1023123599%3AUS%2C868077558%3AUS%2Cio.voodoo.dune%3AUS%2C915637540%3AUS%2C886427730%3AUS%2C493253309%3AUS%2C; ag_lang=en; ag_public_imps=21; __utmt=1; __utma=247563269.873370462.1510860176.1511200723.1511203314.8; __utmb=247563269.6.10.1511203314; __utmc=247563269; __utmz=247563269.1510860176.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); mp_2b6156b771e3a1688ea2424a5f3e5aba_mixpanel=%7B%22distinct_id%22%3A%20%22100923%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fsearchman.com%2Fsignin%3Fnext%3D%252F%22%2C%22%24initial_referring_domain%22%3A%20%22searchman.com%22%2C%22__mps%22%3A%20%7B%7D%2C%22__mpso%22%3A%20%7B%7D%2C%22__mpus%22%3A%20%7B%7D%2C%22__mpa%22%3A%20%7B%7D%2C%22__mpu%22%3A%20%7B%7D%2C%22__mpap%22%3A%20%5B%5D%7D; uvts=6lwMqzNSEByfwHAD; __stripe_sid=a39cba9b-8469-49a0-a8fa-1c5903e0a120; __stripe_mid=2a7dca9a-3c7e-463c-ab35-80d4cb4b798e; mp_mixpanel__c=5"
    }, {
        "name": "Origin",
        "value": "https://searchman.com"
    }, {
        "name": "Accept-Encoding",
        "value": "gzip, deflate, br"
    }, {
        "name": "Host",
        "value": "searchman.com"
    }, {
        "name": "Accept-Language",
        "value": "en-US,en;q=0.9,he;q=0.8"
    }, {
        "name":
        "User-Agent",
        "value":
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
    }, {
        "name":
        "Content-Type",
        "value":
        "application/x-www-form-urlencoded; charset=UTF-8"
    }, {
        "name":
        "Accept",
        "value":
        "application/json, text/javascript, */*; q=0.01"
    }, {
        "name":
        "Referer",
        "value":
        "https://searchman.com/ios/my_keywords/886427730/US/"
    }, {
        "name": "X-Requested-With",
        "value": "XMLHttpRequest"
    }, {
        "name": "Connection",
        "value": "keep-alive"
    }, {
        "name": "Content-Length",
        "value": "31"
    }]
    android_headers_for_add_keyword_HAR = [{
        "name":
        "Cookie",
        "value":
        "__uvt=; _ga=GA1.2.873370462.1510860176; intercom-id-pjtwd42d=cbbd47f7-be1f-4db9-9f3a-86593757378c; mp_f9c053f6cb8aa27c2fe7abfb4847484a_mixpanel=%7B%22distinct_id%22%3A%20%2215fc65ea98426b-07d4a50c6429e1-3b3e5906-15f900-15fc65ea98510d%22%2C%22utm_source%22%3A%20%22searchman%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fsearchman.com%2Fios%2Fapp%2Fus%2F493253309%2Fen%2Fblockchain%2Fblockchain-bitcoin-wallet%2F%3Fd%3DiPhone%22%2C%22%24initial_referring_domain%22%3A%20%22searchman.com%22%7D; ag_uh=3a435e642830ef73bee7e0e0d5ed1358; ag_uhc=75b60a7055763f595e1d3e899bc578ed; ag-portfolio=%5B%22ios-886427730%22%5D; __utmt=1; ag_public_imps=23; ag_lang=en; ag_bh=com.coinbase.android%3AUS%2C806719182%3AUS%2C1291851950%3AUS%2C1023123599%3AUS%2C868077558%3AUS%2Cio.voodoo.dune%3AUS%2C915637540%3AUS%2C886427730%3AUS%2C493253309%3AUS%2C; __utma=247563269.873370462.1510860176.1511250766.1511261486.11; __utmb=247563269.7.10.1511261486; __utmc=247563269; __utmz=247563269.1510860176.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none); mp_2b6156b771e3a1688ea2424a5f3e5aba_mixpanel=%7B%22distinct_id%22%3A%20%22100923%22%2C%22%24initial_referrer%22%3A%20%22https%3A%2F%2Fsearchman.com%2Fsignin%3Fnext%3D%252F%22%2C%22%24initial_referring_domain%22%3A%20%22searchman.com%22%2C%22__mps%22%3A%20%7B%7D%2C%22__mpso%22%3A%20%7B%7D%2C%22__mpus%22%3A%20%7B%7D%2C%22__mpa%22%3A%20%7B%7D%2C%22__mpu%22%3A%20%7B%7D%2C%22__mpap%22%3A%20%5B%5D%7D; uvts=6lwMqzNSEByfwHAD; __stripe_sid=fd046fe6-efa7-4687-83e3-81a441a00dc8; __stripe_mid=2a7dca9a-3c7e-463c-ab35-80d4cb4b798e; mp_mixpanel__c=6"
    }, {
        "name": "Origin",
        "value": "https://searchman.com"
    }, {
        "name": "Accept-Encoding",
        "value": "gzip, deflate, br"
    }, {
        "name": "Host",
        "value": "searchman.com"
    }, {
        "name": "Accept-Language",
        "value": "en-US,en;q=0.9,he;q=0.8"
    }, {
        "name":
        "User-Agent",
        "value":
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
    }, {
        "name":
        "Content-Type",
        "value":
        "application/x-www-form-urlencoded; charset=UTF-8"
    }, {
        "name":
        "Accept",
        "value":
        "application/json, text/javascript, */*; q=0.01"
    }, {
        "name":
        "Referer",
        "value":
        "https://searchman.com/android/my_keywords/com.coinbase.android/US/"
    }, {
        "name": "X-Requested-With",
        "value": "XMLHttpRequest"
    }, {
        "name": "Connection",
        "value": "keep-alive"
    }, {
        "name": "Content-Length",
        "value": "28"
    }]
    ios_headers_for_add_keyword = HAR_to_dict(ios_headers_for_add_keyword_HAR)
    android_headers_for_add_keyword = HAR_to_dict(
        android_headers_for_add_keyword_HAR)

    ios_url_for_add_keyword = "https://searchman.com/ios/my_keywords_save/886427730/US"
    android_url_for_add_keyword = "https://searchman.com/android/my_keywords_save/com.coinbase.android/US"

    data = {'terms': keyword, 'term_type': '3'}
    if os_version is 'android':
        response = requests.post(url=android_url_for_add_keyword,
                                 headers=android_headers_for_add_keyword,
                                 data=data)
    if os_version is 'ios':
        response = requests.post(url=ios_url_for_add_keyword,
                                 headers=ios_headers_for_add_keyword,
                                 data=data)

    #raise if response not ok
    response.raise_for_status()
    "referer",
    "value":
    "https://adwords.google.com/um/GetStarted/Home?__u=8366573661&__c=6519561002&authuser=0"
}]

data1 = "7|0|76|https://adwords.google.com/um/GetStarted/com.google.ads.apps.usermgmt.getstarted.client.main.Module/|22D5B3FA3CEBB378F43793C10A1E70DB|com.google.ads.api.gwt.rpc.client.BatchedInvocationService|invoke|com.google.ads.api.gwt.rpc.client.BatchedInvocationRequest/2983766987|com.google.ads.apps.common.shared.header.BatchRequestHeaderImpl/2595329959|java.util.HashMap/1797211028|com.google.ads.apps.common.shared.header.ApiHeaderType/3992732687|com.google.ads.apps.common.shared.header.BatchAdsApiRequestHeaderInfo/1561664655|com.google.ads.api.modules.request.headers.GrubbyHeader$ChangeIdMode/272930539|com.google.ads.apps.common.usagetracking.server.UsageTrackingService.logImpression|rGRQNhESwLJgUXz8hBjKCfxyUFw:1510910684674|java.util.ArrayList/4159755760|com.google.ads.apps.common.usagetracking.shared.UsageTrackingServiceGwt$ImpressionRequest/3960782202|com.google.ads.apps.common.shared.header.SingleAdsApiRequestHeader/4098801396|com.google.ads.apps.common.shared.header.ClientCacheHint/2402802613|com.google.ads.apps.common.shared.header.ServerCacheHint/3129959624|java.lang.Boolean/476441737|com.google.ads.api.modules.request.headers.ApiVersion/450371163|com.google.ads.api.modules.request.headers.GrubbyHeader$CustomerIdMode/45453300|com.google.ads.api.modules.request.headers.GrubbyHeader$DatabaseReadMode/1150601902|com.google.common.collect.RegularImmutableList/440499227|com.google.ads.common.logging.MetricEntries$ImpressionEntry/454587110|[Lcom.google.ads.common.logging.ApexExperimentMetrics;/1267822276|com.google.ads.common.logging.ApexExperimentMetrics/1204991806|TREATMENT|java.lang.String/2004016611|AWSM|AWN_INTERNALOPS|MCC|NOTIFICATIONS|UM|ADWORDS_NEXT_BILLING||CUES|ADWORDS_NEXT_MCC|ADWORDS_NEXT_INTERNALOPS|ADWORDS_NEXT_ACCESS_TO_ALL|TREATMENT_SIGNUP_FLOW_CLICKS_WITH_BADGES|CM|CM_GROWTH_MOBILE_PROMO|CT|PRIME|AWN_PRIME|AWN_CM|ADWORDS_NEXT|ADWORDS_NEXT_NEW_CUSTOMERS|KP|ADWORDS_NEXT_KEYWORD_PLANNER|https://adwords.google.com/um/GetStarted/Home?__u=8366573661&__c=6519561002&authuser=0#oc|[Lcom.google.ads.common.logging.ExperimentMetrics;/2152658160|com.google.ads.common.logging.ExperimentMetrics/1213839764|enable-call-consent|com.google.common.collect.RegularImmutableMap/1085455152|remove-progress-bar|enable-appstore|orinoco-megablox|get-started-w-logo|show-estimated-reach-panel|expanded-text-ad|youtube-linked-accounts|show-top-ad-preview|enable-goldmine-auto-expand|u2-migration|default-opt-in|billing-all-countries|enable-auto-expand|rewire-guided-orinoco-billing-for-budgets-in-ads|mobile-compatible-orinoco|policy-certificate-expiration|target-cpa-suggestion|electrum-account-linking-ui|[Ljava.lang.String;/2600011424|Orinoco.oc.keywords-editor-keywords-input-focused|oc.keywords-editor-keywords-input-focused|com.google.ads.apps.common.uimode.shared.UiMode/4208379950|1|2|3|4|1|5|5|6|quRMuCHZo|45|7|1|8|0|9|A|10|2|GEmJsq|Hyr8hd|11|12|13|1|14|15|16|0|0|17|13|0|18|0|18|1|0|0|19|6|0|A|0|0|0|0|0|0|0|0|20|0|21|1|0|0|0|0|0|1|1|0|0|0|0|0|0|0|0|0|0|0|0|22|0|0|0|0|7|0|0|0|0|23|24|8|25|0|1|26|22|5|27|28|27|29|27|30|27|31|27|32|33|0|252|34|FUlReeu8O|25|0|1|26|22|3|-24|27|35|-28|36|0|252|34|FWguhina5|25|0|1|26|22|3|-24|-31|-28|37|0|252|34|FW9cs8QQc|25|0|1|26|22|3|-24|-31|-28|38|0|749|34|FW9cs8QQc|25|0|1|39|22|2|27|40|-28|41|0|340|34|FV4L2$Qzk|25|0|0|34|22|9|-24|-31|-28|27|42|27|43|27|44|-25|27|45|-38|46|0|252|34|FWzMJiJ_3|25|0|1|26|22|3|-24|-31|-28|47|0|252|34|FWzhhJjZK|25|0|0|34|22|3|-24|-28|27|48|49|0|252|34|FW1uiRGFm|50|0|51|19|52|53|54|0|1|1|1|52|55|-52|1|1|1|52|56|-52|1|1|1|52|57|-52|1|1|1|52|58|-52|1|1|1|52|59|-52|1|1|1|52|60|-52|1|1|1|52|61|-52|1|1|0|52|62|-52|1|1|1|52|63|-52|1|1|1|52|64|-52|1|1|1|52|65|-52|1|1|1|52|66|-52|1|1|1|52|67|-52|1|1|1|52|68|-52|1|1|1|52|69|-52|1|1|1|52|70|-52|1|1|1|52|71|-52|1|1|1|52|72|-52|1|1|1|V_JTowC|73|0|-1|74|73|1|75|76|0|"

url1 = "https://adwords.google.com/um/GetStarted/g?authuser=0&__u=8366573661&__c=6519561002"

# response = requests.post(url1,headers=HAR_to_dict(headers1),data=data1)
# print "RESPOSNE 1:"
# print json.dumps(response.content,indent=4)

requests_dict = json.load(open("HAR_req.json", 'r'))
my_requests = requests_dict['log']['entries']
responses = []

for request in my_requests:
    url = request['request']['url']
    headers = HAR_to_dict(request['request']['headers'])
    method = request['request']['method']
    if method == "POST":
        data = request['request']['postData']
        responses.append(requests.post(url=url, headers=headers, data=data))
    if method == "GET":
        responses.append(requests.get(url=url, headers=headers))

for response in responses:
    print "RESPONSE:"
    print response.content
Exemple #6
0
def get_weather_custom_data(start_date, end_date):
    from time import strptime
    start_date_str = "{year}/{month}/{day}".format(year=start_date.year,
                                                   month=start_date.month,
                                                   day=start_date.day)
    #my_request['url'] = "https://www.wunderground.com/history/airport/LLBG/{year}/{month}/1/MonthlyHistory.html?&reqdb.zip=&reqdb.magic=&reqdb.wmo=".format(month=month,year=year)
    my_request[
        'url'] = "https://www.wunderground.com/history/airport/LLBG/{start_date_str}/CustomHistory.html?dayend={end_day}&monthend={end_month}&yearend={end_year}&req_city=&req_state=&req_statename=&reqdb.zip=&reqdb.magic=&reqdb.wmo=".format(
            start_date_str=start_date_str,
            end_year=end_date.year,
            end_month=end_date.month,
            end_day=end_date.day)
    my_request['headers'] = HAR_to_dict(
        manual_requests[0]['request']['headers'])
    my_request['cookies'] = HAR_to_dict(
        manual_requests[0]['request']['cookies'])

    response = requests.get(url=my_request['url'],
                            headers=my_request['headers'],
                            cookies=my_request['cookies'])
    tree = html.fromstring(response.content)

    data = {}
    fields = [
        'Day', 'Temp_H', 'Temp_A', 'Temp_M', 'DewPoint_H', 'DewPoint_A',
        'DewPoint_M', 'Humidity_H', 'Humidity_A', 'Humidity_M',
        'SeaLevelPress_H', 'SeaLevelPress_A', 'SeaLevelPress_M',
        'Visibility_H', 'Visibility_A', 'Visibility_M', 'Wind_H', 'Wind_A',
        'Wind_M', 'Precip', 'Events'
    ]
    #fields_xpath_suffixes = ['td[1]',td[3]/span','td[6]/span','td[9]/span','td[12]/span','td[15]/span','td[18]/span','td[20]/span','td[21]']

    months_dict = {
        'Jan': '01',
        'Feb': '02',
        'Mar': '03',
        'Apr': '04',
        'May': '05',
        'Jun': '06',
        'Jul': '07',
        'Aug': '08',
        'Sep': '09',
        'Oct': '10',
        'Nov': '11',
        'Dec': '12'
    }
    idx = 1
    curr_month = None
    curr_year = start_date.year
    next_year_flag = False
    while (True):
        columns = [
            td.text_content().strip() for td in tree.xpath(
                '//*[@id="obsTable"]/tbody[{row}]/tr/td'.format(row=idx))
        ]
        if (not columns):
            break

        #check if its a month row
        if columns[0] in months_dict.keys():
            if next_year_flag is True:
                curr_year = str(int(curr_year) + 1)
                next_year_flag = False
            if columns[0] == 'Dec':
                next_year_flag = True
            curr_month = months_dict[columns[0]]
            idx += 1
            continue

        date_key = "{year}/{month}/{day}".format(year=curr_year,
                                                 month=curr_month,
                                                 day=columns[0])
        data[date_key] = dict(zip(fields, columns))
        cond_data = get_weather_daily_data(date_key)
        data[date_key]['ConditionsScore'] = cond_data['cond_code']
        data[date_key]['CloudsScore'] = cond_data['cond_str_code']
        data[date_key]['Events'] = handle_events_string(
            data[date_key]['Events'].replace("\t",
                                             "").replace("\n",
                                                         "").replace(",", ";"))

        #print date_key
        #print json.dumps(data[date_key],indent=4)
        #sys.stdout.flush()

        idx = idx + 1

    return data
Exemple #7
0
def get_weather_daily_data(date_str):
    my_request[
        'url'] = 'https://www.wunderground.com/history/airport/LLBG/{date_str}/DailyHistory.html'.format(
            date_str=date_str)
    my_request['headers'] = HAR_to_dict(
        manual_requests[0]['request']['headers'])
    my_request['cookies'] = HAR_to_dict(
        manual_requests[0]['request']['cookies'])

    response = requests.get(url=my_request['url'],
                            headers=my_request['headers'],
                            cookies=my_request['cookies'])
    tree = html.fromstring(response.content)

    #key: time(hour), value: dict of data (see list of fields)
    daily_data = {}
    fields = [
        'Time', 'Temp', 'DewPoint', 'Humidity', 'Pressure', 'Visibility',
        'WindDir', 'WindSpeed', 'GustSpeed', 'Precip', 'Events', 'Conditions'
    ]
    fields_xpath_suffixes = [
        'td[1]', 'td[2]/span/span[1]', 'td[3]/span/span[1]', 'td[4]',
        'td[5]/span/span[1]', 'td[6]', 'td[7]', 'td[8]/span[1]/span[1]',
        'td[9]', 'td[10]', 'td[11]', 'td[12]'
    ]

    fields_titles = [
        th.text_content().strip()
        for th in tree.xpath('//*[@id="obsTable"]/thead/tr/th')
    ]
    fields_xpath_suffixes = [
        '', 'td[2]/span/span[1]', 'td[3]/span/span[1]', 'td[4]',
        'td[5]/span/span[1]', 'td[6]', 'td[7]', 'td[8]/span[1]/span[1]',
        'td[9]', 'td[10]', 'td[11]', 'td[12]'
    ]

    #print [r.text_content().strip() for r in tree.xpath('//*[@id="obsTable"]/tbody/tr[3]/td')]
    #return
    #loop over rows
    row_num = 1
    while (tree.xpath('//*[@id="obsTable"]/tbody/tr[{row_num}]'.format(
            row_num=row_num))):
        data = {}

        for idx, field in enumerate(fields_titles):
            try:
                xpath = '//*[@id="obsTable"]/tbody/tr[{row}]/td[{col}]/span/span[1]'.format(
                    row=row_num, col=idx + 1)
                data[field] = tree.xpath(xpath)[0].text
            except:
                xpath = '//*[@id="obsTable"]/tbody/tr[{row}]/td[{col}]'.format(
                    row=row_num, col=idx + 1)
                data[field] = tree.xpath(xpath)[0].text

        #handle conditions string
        conditions_string = tree.xpath(
            '//*[@id="obsTable"]/tbody/tr[{row}]/td[2]'.format(row=row_num +
                                                               1))[0].text

        if data['Conditions'] != "Clear":
            code = handle_conditions_string(conditions_string)
        else:
            code = 0

        data['conditions_details'] = {'code': code, 'str': conditions_string}

        daily_data[data[fields_titles[0]]] = data
        row_num = row_num + 2

    #calculate conditions average
    sum_str_code = 0.0
    sum_cond_code = 0.0
    len = 0
    i = 0
    for time in daily_data.values():
        i = i + 1
        if time['conditions_details']['code'] is not None:
            sum_str_code = sum_str_code + time['conditions_details']['code']
            sum_cond_code = sum_cond_code + handle_conditions_column_string(
                time['Conditions'])
            len = len + 1

    return {
        'cond_str_code': sum_str_code / len,
        'cond_code': sum_cond_code / len
    }
Exemple #8
0
def lambda_handler(event, context):
    #check offline or online
    auth_flag = True
    write_to_file_flag = False
    return_dict = {}

    #authenticate
    auth_url = "http://www.kolnoapeer.co.il/wp-content/themes/KolnoaPeer/inc/physical/login-handle.php"
    auth_headers_HAR = [{
        "name": "Origin",
        "value": "http://www.kolnoapeer.co.il"
    }, {
        "name": "Accept-Encoding",
        "value": "gzip, deflate"
    }, {
        "name": "Host",
        "value": "www.kolnoapeer.co.il"
    }, {
        "name": "Accept-Language",
        "value": "en-US,en;q=0.9,he;q=0.8"
    }, {
        "name":
        "User-Agent",
        "value":
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
    }, {
        "name":
        "Content-Type",
        "value":
        "application/x-www-form-urlencoded; charset=UTF-8"
    }, {
        "name": "Accept",
        "value": "*/*"
    }, {
        "name": "Referer",
        "value": "http://www.kolnoapeer.co.il/"
    }, {
        "name":
        "Cookie",
        "value":
        "optimizelyEndUserId=oeu1512829976741r0.20758536549030615; _hjIncludedInSample=1; optimizelySegments=%7B%225493192264%22%3A%22gc%22%2C%225520510180%22%3A%22false%22%2C%225515770230%22%3A%22search%22%7D; optimizelyBuckets=%7B%7D; _ga=GA1.3.644062575.1512829977; _gid=GA1.3.783463494.1513163597; _gat=1"
    }, {
        "name": "Connection",
        "value": "keep-alive"
    }, {
        "name": "Content-Length",
        "value": "38"
    }]
    auth_headers = HAR_to_dict(auth_headers_HAR)
    auth_cookies_HAR = [{
        "name": "_ga",
        "value": "GA1.3.644062575.1512829977",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_gat",
        "value": "1",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_gid",
        "value": "GA1.3.783463494.1513163597",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_hjIncludedInSample",
        "value": "1",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "optimizelyBuckets",
        "value": "%7B%7D",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "optimizelyEndUserId",
        "value": "oeu1512829976741r0.20758536549030615",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "optimizelySegments",
        "value":
        "%7B%225493192264%22%3A%22gc%22%2C%225520510180%22%3A%22false%22%2C%225515770230%22%3A%22search%22%7D",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }]
    auth_cookies = HAR_to_dict(auth_cookies_HAR)
    auth_data = {
        "mimeType":
        "application/x-www-form-urlencoded; charset=UTF-8",
        "text":
        "userEmail=0507609679&userPass=12345678",
        "params": [{
            "name": "userEmail",
            "value": "0507609679"
        }, {
            "name": "userPass",
            "value": "12345678"
        }]
    }
    auth_data = "userEmail=0507609679&userPass=12345678"

    if (auth_flag):
        response = requests.post(url=auth_url,
                                 headers=auth_headers,
                                 data=auth_data)  #,cookies=auth_cookies)
        auth_response_json = json.loads(response.content)
        session_token = auth_response_json['user_token']
    else:
        session_token = None

    #get weekly sched
    headers_HAR = [{
        "name":
        "Cookie",
        "value":
        "optimizelyEndUserId=oeu1511636150491r0.03070219004762409; _hjIncludedInSample=1; peerUserLogged=true; peerUserID=14611; peerToken=3b5279c1775d41cabe6c0db93b0d7761; peerUserFirstName=%D7%90%D7%9C%D7%94; peerUserLastName=%D7%90%D7%9C%D7%A4%D7%A1%D7%99; peerUserEmail=alfasi21%40hotmail.com; peerUserPhone=%20; peerUserDetails=%7B%22status%22%3A%221%22%2C%22userid%22%3A14611%2C%22user_p_name%22%3A%22%5Cu05d0%5Cu05dc%5Cu05d4%22%2C%22user_token%22%3A%2272c424c0446f42f89bd3d5f4784fb391%22%2C%22user_l_name%22%3A%22%5Cu05d0%5Cu05dc%5Cu05e4%5Cu05e1%5Cu05d9%22%2C%22user_info%22%3A%7B%22CompanyID%22%3A201%2C%22BranchID%22%3A1%2C%22ID%22%3A14611%2C%22FirstName%22%3A%22%5Cu05d0%5Cu05dc%5Cu05d4%22%2C%22LastName%22%3A%22%5Cu05d0%5Cu05dc%5Cu05e4%5Cu05e1%5Cu05d9%22%2C%22MobilePhone%22%3A%22050-7609679%22%2C%22HomePhone%22%3A%22%20%22%2C%22WorkPhone%22%3A%22%20%22%2C%22CardNumber%22%3A0%2C%22Email%22%3A%22alfasi21%40hotmail.com%22%2C%22DateOfBirth%22%3A%221986-07-28T00%3A00%3A00%22%2C%22SignedRegulations%22%3Afalse%2C%22SignedRetulationsInt%22%3Anull%2C%22HasMedical%22%3Afalse%2C%22PaymentLeft%22%3A0%2C%22HasTrainingPlan%22%3Afalse%2C%22IsInsured%22%3Afalse%2C%22InsuranceEndDate%22%3Anull%2C%22CityName%22%3A%22%5Cu05ea%5Cu05dc%20%5Cu05d0%5Cu05d1%5Cu05d9%5Cu05d1%22%2C%22HouseNumber%22%3A0%2C%22GroupCode%22%3A0%2C%22IDNumber%22%3A21967583%2C%22NeighborhoodCode%22%3Anull%2C%22Cars%22%3A%7B%7D%7D%7D; optimizelySegments=%7B%225493192264%22%3A%22gc%22%2C%225520510180%22%3A%22false%22%2C%225515770230%22%3A%22search%22%7D; optimizelyBuckets=%7B%7D; _ga=GA1.3.462259338.1511636151; _gid=GA1.3.185880343.1511636151; _gat_UA-37156680-1=1"
    }, {
        "name": "Origin",
        "value": "http://www.kolnoapeer.co.il"
    }, {
        "name": "Accept-Encoding",
        "value": "gzip, deflate"
    }, {
        "name": "Host",
        "value": "www.kolnoapeer.co.il"
    }, {
        "name": "Accept-Language",
        "value": "en-US,en;q=0.9,he;q=0.8"
    }, {
        "name":
        "User-Agent",
        "value":
        "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"
    }, {
        "name": "Content-Type",
        "value": "application/x-www-form-urlencoded; charset=UTF-8"
    }, {
        "name": "Accept",
        "value": "*/*"
    }, {
        "name":
        "Referer",
        "value":
        "http://www.kolnoapeer.co.il/%d7%9e%d7%a2%d7%a8%d7%9b%d7%aa-%d7%97%d7%95%d7%92%d7%99%d7%9d-%d7%a9%d7%91%d7%95%d7%a2%d7%99/"
    }, {
        "name": "X-Requested-With",
        "value": "XMLHttpRequest"
    }, {
        "name": "Proxy-Connection",
        "value": "keep-alive"
    }, {
        "name": "Content-Length",
        "value": "211"
    }]
    headers = HAR_to_dict(headers_HAR)

    #indicates which week to take from calander. [0/1] for [current/next week]. important to scheduling Sunday classes
    week_flag = 0
    url = "http://www.kolnoapeer.co.il/wp-content/themes/KolnoaPeer/inc/physical/weeklySched.php?week={week_flag}".format(
        week_flag=week_flag)

    data = {
        "mimeType":
        "application/x-www-form-urlencoded; charset=UTF-8",
        "text":
        "site_url=&permalink=http%3A%2F%2Fwww.kolnoapeer.co.il%2F%25d7%259e%25d7%25a2%25d7%25a8%25d7%259b%25d7%25aa-%25d7%2597%25d7%2595%25d7%2592%25d7%2599%25d7%259d-%25d7%25a9%25d7%2591%25d7%2595%25d7%25a2%25d7%2599%2F",
        "params": [{
            "name": "site_url",
            "value": ""
        }, {
            "name":
            "permalink",
            "value":
            "http%3A%2F%2Fwww.kolnoapeer.co.il%2F%25d7%259e%25d7%25a2%25d7%25a8%25d7%259b%25d7%25aa-%25d7%2597%25d7%2595%25d7%2592%25d7%2599%25d7%259d-%25d7%25a9%25d7%2591%25d7%2595%25d7%25a2%25d7%2599%2F"
        }]
    }

    cookies_HAR = [{
        "name": "optimizelyEndUserId",
        "value": "oeu1512829976741r0.20758536549030615",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_gat",
        "value": "1",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_hjIncludedInSample",
        "value": "1",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "optimizelySegments",
        "value":
        "%7B%225493192264%22%3A%22gc%22%2C%225520510180%22%3A%22false%22%2C%225515770230%22%3A%22search%22%7D",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "optimizelyBuckets",
        "value": "%7B%7D",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_ga",
        "value": "GA1.3.644062575.1512829977",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "_gid",
        "value": "GA1.3.783463494.1513163597",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserLogged",
        "value": "true",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserID",
        "value": "14611",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerToken",
        "value": "3b5279c1775d41cabe6c0db93b0d7761",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserFirstName",
        "value": "%D7%90%D7%9C%D7%94",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserLastName",
        "value": "%D7%90%D7%9C%D7%A4%D7%A1%D7%99",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserEmail",
        "value": "alfasi21%40hotmail.com",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserPhone",
        "value": "%20",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }, {
        "name": "peerUserDetails",
        "value":
        "%7B%22status%22%3A%221%22%2C%22userid%22%3A14611%2C%22user_p_name%22%3A%22%5Cu05d0%5Cu05dc%5Cu05d4%22%2C%22user_token%22%3A%223b5279c1775d41cabe6c0db93b0d7761%22%2C%22user_l_name%22%3A%22%5Cu05d0%5Cu05dc%5Cu05e4%5Cu05e1%5Cu05d9%22%2C%22user_info%22%3A%7B%22CompanyID%22%3A201%2C%22BranchID%22%3A1%2C%22ID%22%3A14611%2C%22FirstName%22%3A%22%5Cu05d0%5Cu05dc%5Cu05d4%22%2C%22LastName%22%3A%22%5Cu05d0%5Cu05dc%5Cu05e4%5Cu05e1%5Cu05d9%22%2C%22MobilePhone%22%3A%22050-7609679%22%2C%22HomePhone%22%3A%22%20%22%2C%22WorkPhone%22%3A%22%20%22%2C%22CardNumber%22%3A0%2C%22Email%22%3A%22alfasi21%40hotmail.com%22%2C%22DateOfBirth%22%3A%221986-07-28T00%3A00%3A00%22%2C%22SignedRegulations%22%3Afalse%2C%22SignedRetulationsInt%22%3Anull%2C%22HasMedical%22%3Afalse%2C%22PaymentLeft%22%3A0%2C%22HasTrainingPlan%22%3Afalse%2C%22IsInsured%22%3Afalse%2C%22InsuranceEndDate%22%3Anull%2C%22CityName%22%3A%22%5Cu05ea%5Cu05dc%20%5Cu05d0%5Cu05d1%5Cu05d9%5Cu05d1%22%2C%22HouseNumber%22%3A0%2C%22GroupCode%22%3A0%2C%22IDNumber%22%3A21967583%2C%22ZipCode%22%3A0%2C%22NeighborhoodCode%22%3Anull%2C%22Cars%22%3A%7B%7D%7D%7D",
        "expires": None,
        "httpOnly": False,
        "secure": False
    }]
    cookies = HAR_to_dict(cookies_HAR)

    #   request to auth_url returns a json with the user_token
    #   we take the token and injects it into our template cookie and header's cookie field
    cookies['peerToken'] = session_token
    headers[
        'Cookie'] = "optimizelyEndUserId=oeu1511636150491r0.03070219004762409; _hjIncludedInSample=1; peerUserLogged=true; peerUserID=14611; peerToken={session_token}; peerUserFirstName=%D7%90%D7%9C%D7%94; peerUserLastName=%D7%90%D7%9C%D7%A4%D7%A1%D7%99; peerUserEmail=alfasi21%40hotmail.com; peerUserPhone=%20; peerUserDetails=%7B%22status%22%3A%221%22%2C%22userid%22%3A14611%2C%22user_p_name%22%3A%22%5Cu05d0%5Cu05dc%5Cu05d4%22%2C%22user_token%22%3A%2272c424c0446f42f89bd3d5f4784fb391%22%2C%22user_l_name%22%3A%22%5Cu05d0%5Cu05dc%5Cu05e4%5Cu05e1%5Cu05d9%22%2C%22user_info%22%3A%7B%22CompanyID%22%3A201%2C%22BranchID%22%3A1%2C%22ID%22%3A14611%2C%22FirstName%22%3A%22%5Cu05d0%5Cu05dc%5Cu05d4%22%2C%22LastName%22%3A%22%5Cu05d0%5Cu05dc%5Cu05e4%5Cu05e1%5Cu05d9%22%2C%22MobilePhone%22%3A%22050-7609679%22%2C%22HomePhone%22%3A%22%20%22%2C%22WorkPhone%22%3A%22%20%22%2C%22CardNumber%22%3A0%2C%22Email%22%3A%22alfasi21%40hotmail.com%22%2C%22DateOfBirth%22%3A%221986-07-28T00%3A00%3A00%22%2C%22SignedRegulations%22%3Afalse%2C%22SignedRetulationsInt%22%3Anull%2C%22HasMedical%22%3Afalse%2C%22PaymentLeft%22%3A0%2C%22HasTrainingPlan%22%3Afalse%2C%22IsInsured%22%3Afalse%2C%22InsuranceEndDate%22%3Anull%2C%22CityName%22%3A%22%5Cu05ea%5Cu05dc%20%5Cu05d0%5Cu05d1%5Cu05d9%5Cu05d1%22%2C%22HouseNumber%22%3A0%2C%22GroupCode%22%3A0%2C%22IDNumber%22%3A21967583%2C%22NeighborhoodCode%22%3Anull%2C%22Cars%22%3A%7B%7D%7D%7D; optimizelySegments=%7B%225493192264%22%3A%22gc%22%2C%225520510180%22%3A%22false%22%2C%225515770230%22%3A%22search%22%7D; optimizelyBuckets=%7B%7D; _ga=GA1.3.462259338.1511636151; _gid=GA1.3.185880343.1511636151; _gat_UA-37156680-1=1".format(
            session_token=session_token)

    #get weekly sched
    if (auth_flag):
        response = requests.post(url=url,
                                 headers=headers,
                                 data=data,
                                 cookies=cookies)
        if (write_to_file_flag):
            weekly_sched_response_file = open('response.html', 'w')
            weekly_sched_response_file.write(response.content)
            weekly_sched_response_file.close()

    html_prefix = '<!DOCTYPE html><html lang="en" dir="ltr" class="com"><head>dsd</head><body>'
    html_suffix = '</body></html>'
    if (write_to_file_flag):
        with open("response.html") as weekly_sched_response_file:
            data = weekly_sched_response_file.read()
    else:
        data = response.content
    tree = html.fromstring(html_prefix + data + html_suffix)

    ###############################################################
    #xpath Examples:

    #xpath for all classes in specific day (1-7)
    #xpath = '/html/body/div/div[2]/article[6]/div'

    #xpath for the 8th class - for the number of seats section
    #xpath = '/html/body/div/div[2]/article[5]/div[8]/section[2]'

    #xpath for the 2th lesson - for the lesson name
    #xpath = '/html/body/div/div[2]/article[6]/div[2]/h4'
    ###############################################################

    #get all classes of DAY_NUMBER
    i = 0
    week_day = (datetime.datetime.today().weekday() + 3) % 7
    if week_day is 0:
        week_day = 7

    xpath = '/html/body/div/div[2]/article[{week_day}]/div'.format(
        week_day=week_day)
    lessons = tree.xpath(xpath)

    #looping over all lessons for tomorrow
    while (True):
        lesson = lessons[i]
        i = i + 1
        xpath = '/html/body/div/div[2]/article[{week_day}]/div[{lesson_number}]/h4'.format(
            week_day=week_day, lesson_number=i)
        lesson_name = tree.xpath(xpath)[0].text
        #xpath = '/html/body/div/div[2]/article[{week_day}]/div[{lesson_number}]/section[2]'.format(week_day=week_day,lesson_number=i)
        #lesson_status = tree.xpath(xpath)[0].text

        date = lesson.attrib['data-date']
        xdate = urllib.quote_plus(date)
        hour = lesson.attrib['data-hour']
        xhour = urllib.quote_plus(hour)
        lessonID = lesson.attrib['data-classid']
        xlessonID = urllib.quote_plus(lessonID)
        instructor = lesson.attrib['data-instructor'].encode('UTF-8')
        xinstructor = urllib.quote_plus(instructor)
        if lesson_name in ['Kickboxing', 'Hiit Trx']:
            return_dict['lesson'] = lesson_name
            break

    #attrib = {'data-classid': '81', 'data-dur': '50', 'data-hour': '073000', 'data-date': '2017-12-14T07:30:00+02:00', 'data-instructor': u'\xd7\x9c\xd7\x99\xd7\xa8\xd7\x95\xd7\x9f \xd7\x9c.', 'class': 'one-course '}
    #<div class="one-course " data-classid="385" data-date="2017-12-15T09:00:00+02:00" data-hour="090000" data-dur="90" data-instructor="לינור מ.">

    #sign up to class
    request = {
        "method": "POST",
        "url":
        "http://www.kolnoapeer.co.il/wp-content/themes/KolnoaPeer/inc/physical/signToClass.php",
        "httpVersion": "HTTP/1.1",
        "headers": "ERASED",
        "queryString": [],
        "cookies": "ERASED",
        "headersSize": 2234,
        "bodySize": 114,
        "postData": {
            "mimeType":
            "application/x-www-form-urlencoded; charset=UTF-8",
            "text":
            "date=2017-12-15T09%3A00%3A00%2B02%3A00&hour=090000&lessonID=385&myInstructor=%D7%9C%D7%99%D7%A0%D7%95%D7%A8%20%D7%9E.",
            "params": [{
                "name": "date",
                "value": "2017-12-15T09%3A00%3A00%2B02%3A00"
            }, {
                "name": "hour",
                "value": "090000"
            }, {
                "name": "lessonID",
                "value": "385"
            }, {
                "name": "myInstructor",
                "value": "%D7%9C%D7%99%D7%A0%D7%95%D7%A8%20%D7%9E."
            }]
        }
    }
    url = request['url']
    data = "date={date}&hour={hour}&lessonID={lessonID}&myInstructor={myInstructor}".format(
        date=xdate, hour=xhour, lessonID=xlessonID, myInstructor=xinstructor)
    return_dict['form_data'] = data

    response = requests.post(url=url,
                             headers=headers,
                             cookies=cookies,
                             data=data)
    return_dict['response_content'] = response.content
    return_dict['response_status_code'] = response.status_code
    #if response.content == 'success' and response.status_code is 200:
    #    return "signup for class succeed"
    #else:
    return return_dict