Example #1
0
def parse_yahoo_stk_hist(url):
    try:
        name = url.split("=")[1].split(".")[0]
        # print("Name = ", name)
        bs = h.parse_url(url)
        if bs:
            table = bs.find('div', {'class': "Pb(10px) Ovx(a) W(100%)"}).find_all("table", {"class": "W(100%) M(0)"})[0]
            data = [
                       [td.string.strip() for td in tr.find_all('td') if td.string]
                       for tr in table.find_all('tr')[2:]
                   ][:-1]
            # print(data)
            # data.insert(0, ['STK_DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'ACLOSE', 'VOLUME'])
            df = get_data_frame(data, name)
            if len(df) > 0:
                df_columns = list(df)
                table = "STK_INFO_HISTORY"
                constraint = ', '.join(['STK_DATE', 'NSE_CODE'])
                values = "to_date(%s, 'DD-MON-YYYY'), %s, %s, %s, %s, %s, %s"
                insert_stmt = h.create_update_query(table, df_columns, values, constraint)
                conn = psycopg2.connect(database="trading",
                                        user="******",
                                        password="******")
                conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
                cursor = conn.cursor()
                execute_batch(cursor, insert_stmt, df.values)
                conn.commit()
                db.close_connection(conn, cursor)
    except Exception as err:
        traceback.print_exc()
        print("Exception = ", str(err))
Example #2
0
def get_shares_details(stock_url, thread_count):
    # Variables declaration
    results_que = Queue()
    failed_que = Queue()

    start = time.time()
    # Get the shares from money control
    shares = get_shrs_from_mnctl(stock_url)
    log.info("Total number of shares returned = {}".format(len(shares)))
    # shares = {k: shares[k] for k in list(shares)[:50]}
    if shares and len(shares) > 0:
        # put into Queue
        url_que = get_shares_category(shares)
        log.info("Shares added to Queue to process...")

    for i in range(thread_count):
        t = threading.Thread(target=process_queue,
                             args=(url_que, results_que, failed_que))
        t.daemon = True
        t.start()

    url_que.join()
    log.info("Failed url count = {}".format(failed_que.qsize()))
    log.info("Success url count = {}".format(results_que.qsize()))

    while not failed_que.empty():
        log.warning("Failed URL details = {}".format(failed_que.get()))

    final_data = {}
    while not results_que.empty():
        # final_data.append(results_que.get())
        tmp_dict = results_que.get()
        key = tmp_dict.get("CATEGORY")
        h.upd_dic_with_sub_list(key, tmp_dict, final_data)
    pd.set_option('display.max_columns', 15)
    for category in final_data:
        cat_up = category.upper()
        print("CATEGORY = {} and count = {}".format(cat_up,
                                                    len(final_data[category])))
        df = pd.DataFrame(final_data[category])
        df = df.set_index("NAME")
        # Slice it as needed
        sliced_df = df.loc[:, [
            'MARKET CAP (Rs Cr)', 'EPS (TTM)', 'P/E', 'INDUSTRY P/E',
            'BOOK VALUE (Rs)', 'FACE VALUE (Rs)', 'DIV YIELD.(%)'
        ]]
        sliced_df = sliced_df.apply(pd.to_numeric, errors='ignore')
        sorted_df = sliced_df.sort_values(by=['EPS (TTM)', 'P/E'],
                                          ascending=[False, False])
        writer_orig = pd.ExcelWriter(os.path.join(
            commons.get_prop('base-path', 'output'),
            cat_up + '_Listings.xlsx'),
                                     engine='xlsxwriter')
        sorted_df.to_excel(writer_orig, index=True, sheet_name='report')
        writer_orig.save()

        # Sort by  P/E

    print("Execution time = {0:.5f}".format(time.time() - start))
Example #3
0
def get_ipo_details_perf(url):

    ipo_details = {}
    try:
        bs = h.parse_url(url)
        print("URL = ", url)
        if bs:
            table = bs.find("table",
                            {"class": "table table-bordered table-condensed"})
            rows = table.findAll("tr")
            row = 1
            while row < len(rows):
                if row % 2 == 1:
                    cols = rows[row].find_all('td')
                    i = 0
                    ele_list = []
                    __key = None
                    for ele in cols:
                        i += 1
                        if i == 1: __key = ele.text.strip()
                        __val = ele.text.strip()
                        if i == 4:
                            # date massage
                            day_list = __val.split(" ")
                            if len(day_list) == 3:
                                if len(day_list[1].strip(",")) == 1:
                                    day_list[1] = '0' + day_list[1].strip(",")
                                else:
                                    day_list[1] = day_list[1].strip(",")
                                __val = "-".join(day_list)
                            else:
                                __val = "-".join(['Jan', '01', '1900'])
                        if not (i == 2 or i == 3):
                            ele_list.append(__val)
                    ipo_details[__key] = ele_list

                elif row % 2 == 0:
                    divs = rows[row].find_all('div')[0]
                    sub_div = divs.descendants
                    i = 0
                    for div in sub_div:
                        if i == 5:
                            day_data = get_listed_details(div)
                            h.upd_dic_with_sub_list_ext(
                                __key, day_data, ipo_details)
                        i += 1

                row += 1
    except Exception as err:
        traceback.print_exc()
        print("Exception =", str(err))
    return ipo_details
def mc_get_perf_stk_details(bs):
    comp_details = {}
    try:
        std_data = bs.find('div', {'id': 'mktdet_1'})
        for each_div in std_data.findAll('div', attrs={'class': 'PA7 brdb'}):
            sub_div = each_div.descendants
            __tag_name, __tag_value = None, None
            for cd in sub_div:
                if cd.name == 'div' and cd.get('class', '') == ['FL', 'gL_10', 'UC']:
                    __tag_name = cd.text
                if cd.name == 'div' and cd.get('class', '') == ['FR', 'gD_12']:
                    __tag_value = cd.text
                # print("BFR tag Name = {} and value = {}".format(__tag_name, __tag_value))
                if __tag_name and __tag_value and __tag_name in c.STK_RATIO_CON:
                    __tag_name = c.STK_RATIO_CON[__tag_name]
                    if __tag_name not in ['NAME', 'CATEGORY', 'SUB_CATEGORY']:
                        __tag_value = h.extract_float(__tag_value)
                    # print("AFR tag Name = {} and value = {}".format(__tag_name, __tag_value))
                    comp_details[__tag_name] = __tag_value
                    __tag_name, __tag_value = None, None
        # print("COMP DETAILS =", comp_details)

    except Exception as err:
        print("While parsing PERF DETAILS {}".format(err))
        traceback.print_exc()
    return comp_details
def mny_ctr_shr_frm_url(cmp_name, cmp_url):
    comp_details = {}
    try:
        comp_details['NAME'] = cmp_name
        elements = cmp_url.split("/")
        if len(elements) > 5:
            key = elements[5]
            comp_details['CATEGORY'] = key
        bs = h.parse_url(cmp_url)
        if bs:
            std_data = bs.find('div', {'id': 'mktdet_1'})
            for each_div in std_data.findAll('div', attrs={'class': 'PA7 brdb'}):
                sub_div = each_div.descendants
                __tag_name, __tag_value = None, None
                for cd in sub_div:
                    if cd.name == 'div' and cd.get('class', '') == ['FL', 'gL_10', 'UC']:
                        __tag_name = cd.text
                    if cd.name == 'div' and cd.get('class', '') == ['FR', 'gD_12']:
                        __tag_value = cd.text
                    if __tag_name and __tag_value:
                        comp_details[__tag_name] = __tag_value
                        __tag_name, __tag_value = None, None
            # print("COMP DETAILS =", comp_details)

    except Exception as err:
        # log.error("mny_ctr_shr_frm_url ERROR = ", str(err))
        raise err
    return comp_details
Example #6
0
def mny_ctr_shr_frm_url(cmp_name, cmp_url, cat):
    with print_lock:
        # print("\nStarting thread {}".format(threading.current_thread().name))
        try:
            dict_comp = {"NAME": cmp_name}

            bs = h.parse_url(cmp_url)

            if bs:
                print("bs coming")
                std_data = bs.find('div', {'id': 'mktdet_1'})
                for each_div in std_data.findAll('div',
                                                 attrs={'class': 'PA7 brdb'}):
                    sub_div = each_div.descendants
                    __tag_name, __tag_value = None, None
                    for cd in sub_div:
                        if cd.name == 'div' and cd.get(
                                'class', '') == ['FL', 'gL_10', 'UC']:
                            __tag_name = cd.text
                        if cd.name == 'div' and cd.get('class',
                                                       '') == ['FR', 'gD_12']:
                            __tag_value = cd.text
                        if __tag_name and __tag_value:
                            dict_comp[__tag_name] = __tag_value
                            __tag_name, __tag_value = None, None
            return dict_comp

        except Exception as err:
            print("Error is = ", str(err))
            raise err

        return
Example #7
0
def get_yahoo_fin_urls():
    NO_URL_COUNT = 1500
    yahoo = "https://in.finance.yahoo.com"
    i = 0
    url_list = []
    while i < NO_URL_COUNT:
        url = "https://in.finance.yahoo.com/most-active?offset=" + str(
            i) + "&count=25"
        url_list.append(url)
        i += 25
    urls = []
    for url in url_list:
        try:
            bs = h.parse_url(url)
            if bs:
                std_data = bs.find('div', {
                    'class': "Ovx(s)"
                }).find_all("table", {"class": "W(100%)"})[0].find_all('tr')
                for tr in std_data:
                    link = tr.find('a', href=True, text=True)['href']
                    url = (yahoo + link).replace('?p', '/history?p')
                    if url not in urls:
                        urls.append(url)
        except Exception as err:
            print("Exception = ", str(err))
    print("No of URLs = ", len(urls))
    return urls
Example #8
0
def load_stk_ratio():
    # Variables declaration
    start = time.time()
    file_path = os.path.join(commons.get_prop('base-path', 'ratio-input'))
    files = [os.path.join(file_path, fn) for fn in next(os.walk(file_path))[2]]
    all_pages = []
    try:
        for file in files:
            read_lines = h.read_list_from_json_file(file)
            all_pages.extend(read_lines)

        # Total number of links to process
        print("No of urls to process", len(all_pages))
        page_bins = h.chunks(THREAD_COUNT, all_pages)

        pool = ThreadPool(processes=THREAD_COUNT)
        # use all available cores, otherwise specify the number you want as an argument
        for link_array in page_bins:
            pool.apply_async(process_pages,
                             args=(link_array, ),
                             callback=log_result)
        pool.close()
        pool.join()

        for df_frames in result_list:
            try:
                result = pd.concat(df_frames, ignore_index=True)
                if len(result) > 0:
                    df_columns = list(result)
                    table = "STK_PERF_HISTORY"
                    values = "to_date(%s, 'MONYY'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s"
                    constraint = ', '.join(['NAME', 'NSE_CODE', 'STK_YEAR'])
                    # create INSERT INTO table (columns) VALUES('%s',...)
                    insert_stmt = h.create_update_query(
                        table, df_columns, values, constraint)
                    curr, con = db.get_connection()
                    execute_batch(curr, insert_stmt, result.values)
                    con.commit()
                    db.close_connection(con, curr)

            except Exception as err:
                print("Exception while inserting data into table ", str(err))

    except Exception as err:
        print(str(err))
    print("Execution time = {0:.5f}".format(time.time() - start))
def get_shares_details(stock_url, process_cnt):
    # Variables declaration
    failed_data = []
    start = time.time()
    # Get the shares from money control
    page_list = get_list_of_share_links(stock_url)
    page_list = page_list[:10]
    print("Total Process count = {}".format(process_cnt))
    print("Total URL count = {}".format(len(page_list)))

    pool = multi.Pool(processes=process_cnt)
    # use all available cores, otherwise specify the number you want as an argument
    results = [pool.apply_async(process_queue, args=(link,)) for link in page_list]
    pool.close()
    pool.join()
    print(results)
    print("Total SUCCESS URL count = {}".format(len(results)))
    log.warning("Total FAILURE URL Count = {}".format(len(failed_data)))

    final_data = {}
    for ele in results:
        tmp_dict = ele.get()
        key = tmp_dict.get("CATEGORY")
        h.upd_dic_with_sub_list(key, tmp_dict, final_data)
    pd.set_option('display.max_columns', 15)

    for category in final_data:
        cat_up = category.upper()
        print("CATEGORY = {} and count = {}".format(cat_up, len(final_data[category])))
        df = pd.DataFrame(final_data[category])
        df = df.set_index("NAME")
        # Slice it as needed
        sliced_df = df.loc[:, ['MARKET CAP (Rs Cr)', 'EPS (TTM)', 'P/E', 'INDUSTRY P/E', 'BOOK VALUE (Rs)',
                               'FACE VALUE (Rs)', 'DIV YIELD.(%)']]
        sliced_df = sliced_df.apply(pd.to_numeric, errors='ignore')
        sorted_df = sliced_df.sort_values(by=['EPS (TTM)', 'P/E'], ascending=[False, False])
        writer_orig = pd.ExcelWriter(os.path.join(commons.get_prop('base-path', 'output'), cat_up + '_Listings.xlsx'),
                                     engine='xlsxwriter')
        sorted_df.to_excel(writer_orig, index=True, sheet_name='report')
        writer_orig.save()
    print("Execution time = {0:.5f}".format(time.time() - start))
Example #10
0
def parse_yahoo_stk_hist(url):

    try:
        name = url.split("=")[1].split(".")[0]
        # print("Name = ", name)
        bs = h.parse_url(url)
        if bs:
            table = bs.find('div', {
                'class': "Pb(10px) Ovx(a) W(100%)"
            }).find_all("table", {"class": "W(100%) M(0)"})[0]
            data = [[
                td.string.strip() for td in tr.find_all('td') if td.string
            ] for tr in table.find_all('tr')[2:]][:-1]
            # print(data)
            # data.insert(0, ['STK_DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE', 'ACLOSE', 'VOLUME'])
            # Set pandas options
            pd.set_option('display.max_columns', None)
            pd.set_option('display.expand_frame_repr', False)
            pd.set_option('max_colwidth', 0)
            df = pd.DataFrame(data,
                              columns=[
                                  'STK_DATE', 'OPEN', 'HIGH', 'LOW', 'CLOSE',
                                  'ACLOSE', 'VOLUME'
                              ])
            df = df.assign(NSE_CODE=Series(name, index=df.index))
            df = df.drop(columns='ACLOSE')
            cols = ['OPEN', 'HIGH', 'LOW', 'CLOSE', 'VOLUME']
            df[cols] = df[cols].replace({'\$': '', ',': ''}, regex=True)
            # Drop a row by condition
            df = df[df['OPEN'].notnull()]
            drop_cols = ['STK_DATE', 'NSE_CODE']
            cols = df.columns.drop(drop_cols)
            df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
            df = df.fillna(0)

            if len(df) > 0:
                df_columns = list(df)
                table = "STK_INFO_HISTORY"
                constraint = ', '.join(['STK_DATE', 'NSE_CODE'])
                values = "to_date(%s, 'DD-MON-YYYY'), %s, %s, %s, %s, %s, %s"
                insert_stmt = create_update_query(table, df_columns, values,
                                                  constraint)
                conn = psycopg2.connect(database="trading",
                                        user="******",
                                        password="******")
                conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
                cursor = conn.cursor()
                execute_batch(cursor, insert_stmt, df.values)
                conn.commit()
                cursor.close()
    except Exception as err:
        traceback.print_exc()
        print("Exception = ", str(err))
Example #11
0
def parse_url():
    stock_ratio = {}
    url = "https://www.moneycontrol.com/financials/yesbank/ratiosVI/YB#YB"
    bs = h.parse_url(url)
    if bs:
        std_data = bs.find('div', {
            'class': 'PB10'
        }).find('div', {'class': 'FL gry10'})
        nse_code = (std_data.text.split("|")[1]).split(":")[1].strip()
        print("NSE_CODE", nse_code)
        data = [[[td.string.strip() for td in tr.find_all('td') if td.string]
                 for tr in table.find_all('tr')[2:]]
                for table in bs.find_all("table", {"class": "table4"})[2:]]
        ele_list = data[0]
        ratio_elements['STK_YEAR'] = data[0][0]
        i = 2
        while i < len(ele_list) - 4:
            arr = ele_list[i]
            if len(arr) > 5:
                key = ratio_con.get(arr[0])
                val = arr[1:]
                if key: ratio_elements[key] = val
            i += 1
        print("STK RATIO = ", ratio_elements)
        # Set pandas options
        pd.set_option('display.max_columns', None)
        pd.set_option('display.expand_frame_repr', False)
        pd.set_option('max_colwidth', 0)
        df = pd.DataFrame(ratio_elements)
        df = df.apply(pd.to_numeric, errors='ignore')
        df = df.assign(NSE_CODE=Series(nse_code, index=df.index))

        if len(df) > 0:
            df_columns = list(df)
            table = "STK_PERF_HISTORY"
            values = "to_date(%s, 'MONYY'), %s, %s, %s, %s, %s, %s, %s, %s, %s"
            constraint = ', '.join(['NAME', 'NSE_CODE', 'STK_YEAR'])
            # create INSERT INTO table (columns) VALUES('%s',...)
            insert_stmt = create_update_query(table, df_columns, values,
                                              constraint)
            print("PERF HIST= ", insert_stmt)
            conn = psycopg2.connect(database="trading",
                                    user="******",
                                    password="******")
            conn.set_isolation_level(ISOLATION_LEVEL_AUTOCOMMIT)
            cursor = conn.cursor()
            execute_batch(cursor, insert_stmt, df.values)
            conn.commit()
            cursor.close()
def mny_ctr_shr_frm_url(cmp_name, cmp_url):
    comp_details = {}
    try:
        bs = h.parse_url(cmp_url)
        if bs:
            base_data = bs.find('div', {'class': 'FL gry10'})
            if base_data:
                bs_txt_arr = base_data.text.split("|")
                bse_code = bs_txt_arr[0].split(":")[1]
                nse_code = bs_txt_arr[1].split(":")[1].strip()
                isin_code = bs_txt_arr[2].split(":")[1].strip()
                sector = bs_txt_arr[3].split(":")[1].strip()
                stk_result = {}
                if nse_code:
                    stk_result = mc_get_day_stk_details(bs, 'content_nse', cmp_url)
                    # print("STK DAY details NSE = ", stk_result)
                if not stk_result and isin_code:
                    stk_result = mc_get_day_stk_details(bs, 'content_bse', cmp_url)
                    # print("STK DAY details BSE  = ", stk_result)
                if stk_result:
                    comp_details = mc_get_perf_stk_details(bs)
                    comp_details['NAME'] = cmp_name
                    category = 'N/A'
                    sub_category = 'N/A'
                    if sector:
                        cat_list = sector.split("-")
                        if len(cat_list) > 1:
                            category = cat_list[0]
                            sub_category = cat_list[1:]
                        else:
                            category = sector
                    comp_details['CATEGORY'] = category
                    comp_details['SUB_CATEGORY'] = sub_category
                    nse_code = nse_code if nse_code else isin_code
                    comp_details['NSE_CODE'] = nse_code
                    comp_details['URL'] = cmp_url
                    comp_details.update(stk_result)
                    # print("STK complete details = ", comp_details)
                else:
                    print("COMP {} not listed or errored".format(cmp_url))
            else:
                print("COMP {} not listed or errored".format(cmp_url))
    except Exception as err:
        print("CMP URL = {} with error = {}".format(cmp_url, err))
        # raise err
    return comp_details
def mc_get_day_stk_details(bs, id, cmp_url):
    data_dict = {}
    try:
        # Get date, stock current price and traded volume
        if bs.find('div', {'id': id}).find('div', {'class': 'brdb PB5'}):
            bse_data = bs.find('div', {'id': id}).find('div', {'class': 'brdb PB5'}).findAll('div')
            if bse_data:
                year = time.strftime("%Y")
                bse_dt = bse_data[3].text.split(",")[0].strip()
                if bse_dt and len(bse_dt) > 5:
                    data_dict["STK_DATE"] = year + ' ' + bse_dt
                data_dict["CURR_PRICE"] = bse_data[4].text.strip()
                bse_st_vol = h.alpnum_to_num(bse_data[6].text.strip().split("\n")[0])
                data_dict["TRADED_VOLUME"] = bse_st_vol.strip()
        # Get previous and open price of the share
        if bs.find('div', {'id': id}).find('div', {'class': 'brdb PA5'}):
            bse_data = bs.find('div', {'id': id}).find('div', {'class': 'brdb PA5'}).findAll('div')
            stk_prc = 0
            for ele in bse_data:
                if ele.get("class") == ['gD_12', 'PB3']:
                    if stk_prc == 0:
                        data_dict["PREV_PRICE"] = ele.text.strip()
                    elif stk_prc == 1:
                        data_dict["OPEN_PRICE"] = ele.text.strip()
                    stk_prc += 1
        # Get low, high and 52 week prices
        if bs.find('div', {'id': id}).find('div', {'class': "PT10 clearfix"}):
            bse_data = bs.find('div', {'id': id}).find('div', {'class': "PT10 clearfix"}).findAll('div')
            stk_p = 0
            for ele in bse_data:
                if ele.get('class') == ["PB3", "gD_11"]:
                    ele_li = ele.text.strip().split("\n")
                    if stk_p == 0:
                        data_dict["LOW_PRICE"] = ele_li[1]
                        data_dict["HIGH_PRICE"] = ele_li[3]
                    elif stk_p == 1:
                        data_dict["LOWEST_PRICE"] = ele_li[1]
                        data_dict["HIGEST_PRICE"] = ele_li[3]
                    stk_p += 1
        if not (len(data_dict)) == 9:
            data_dict = {}
    except Exception as err:
        print("While prasing for day stocks = {}".format(err))
    return data_dict
def get_shrs_from_mnctl(url):
    """
        This function will parse html and return data
    :param url: home page url
    :param shares: dictionary contains result
    :return: dictionary which contains key as company name and value is url
    """
    shares = {}
    try:

        bs = h.parse_url(url)
        if bs:
            table = bs.find("table", {"class": "pcq_tbl MT10"})
            for row in table.findAll("tr"):
                for link in row.findAll("a"):
                    shares[link.get("title")] = link.get('href')
    except Exception as err:
        log.exception("ERROR in get_shr_from_mnctl = {}".format(err))
    return shares
Example #15
0
def get_listed_ipo(stock_url):
    jobs = []
    spipe_list = []
    # Variables declaration
    start = time.time()
    # Get the shares from chitt
    urls = get_list_of_urls(stock_url)
    # urls = ['http://www.chittorgarh.com/ipo/ipo_perf_tracker.asp', 'http://www.chittorgarh.com/ipo/ipo_perf_tracker.asp?FormIPOPT_Page=2', 'http://www.chittorgarh.com/ipo/ipo_perf_tracker.asp?FormIPOPT_Page=3', 'http://www.chittorgarh.com/ipo/ipo_perf_tracker.asp?FormIPOPT_Page=4']
    print(urls)
    cpdus = multi.cpu_count()
    page_bins = h.chunks(cpdus, urls)
    for cpdu in range(cpdus):
        recv_end, send_end = multi.Pipe(False)
        worker = multi.Process(target=process_page,
                               args=(page_bins[cpdu], send_end))
        worker.daemon = True
        jobs.append(worker)
        spipe_list.append(recv_end)
        worker.start()

    for job in jobs:
        job.join(timeout=10)
    print("All jobs completed......")
    try:
        ipo_size = 0
        result_list = [x.recv() for x in spipe_list]
        curr, con = db.get_connection()
        statement = create_update_query('IPO_STK_DETAILS')
        for results in result_list:
            for data in results:
                values = [data[k] for k in data]
                ipo_size += len(values)
                df = get_data_frame(values)
                records = df.to_dict(orient='records')
                print(records)
                execute_batch(curr, statement, df.values)
                con.commit()
        db.close_connection(con, curr)
        print("IPOs listed so far = {}".format(ipo_size))
    except Exception as err:
        traceback.print_exc()
        print(str(err))
    print("Execution time = {0:.5f}".format(time.time() - start))
Example #16
0
def get_stk_history():
    jobs = []
    # Variables declaration
    start = time.time()
    # Get the shares from yahoo
    urls = get_yahoo_fin_urls()
    # urls = ["https://in.finance.yahoo.com/quote/3IINFOTECH.NS/history?p=3IINFOTECH.NS", "https://in.finance.yahoo.com/quote/3IINFOTECH.NS/history?p=3IINFOTECH.NS", "https://in.finance.yahoo.com/quote/3IINFOTECH.NS/history?p=3IINFOTECH.NS", "https://in.finance.yahoo.com/quote/3IINFOTECH.NS/history?p=3IINFOTECH.NS"]
    print("No of URLs = ", len(urls))
    cpdus = multi.cpu_count()
    page_bins = h.chunks(cpdus, urls)
    for chunk in page_bins:
        worker = multi.Process(target=process_page, args=(chunk,))
        worker.daemon = True
        jobs.append(worker)
        worker.start()

    for job in jobs:
        job.join(timeout=10)
    print("All jobs completed......")
    print("Execution time = {0:.5f}".format(time.time() - start))
Example #17
0
def get_ipo_day_info(link):
    ipo_details = []
    try:
        bs = h.parse_url(link)
        print("URL DAY=", link)
        if bs:
            divs = bs.find("div", {
                'class': 'col-lg-12 col-md-12 col-sm-12 main'
            }).findAll('div')
            nse = divs[2].text.split(":")[1].strip()
            face_val = divs[3].text.split(":")[1].strip()
            isin = divs[6].text.split(":")[1].strip()
            if nse and len(nse) > 0:
                ipo_details.append(nse)
            else:
                ipo_details.append(isin)
            ipo_details.append(face_val)
            table = bs.find(
                "table", {
                    "class":
                    "table table-condensed table-bordered table-striped table-nonfluid"
                })
            rows = table.findAll("tr")
            # curr_price = rows[1].findAll("td")[0].find('span').text.strip()
            open_price = rows[2].findAll("td")[1].text.strip()
            hl = rows[3].findAll("td")[1].text.split("-")
            high, low = hl[0].strip(), hl[1].strip()
            prev_price = rows[4].findAll("td")[1].text.strip()
            turn_over = rows[8].findAll("td")[1].text.strip()
            # ipo_details.append(curr_price)
            ipo_details.append(open_price)
            ipo_details.append(high)
            ipo_details.append(low)
            ipo_details.append(turn_over)
            ipo_details.append(prev_price)
            if len(ipo_details) == 8:
                return ipo_details
    except Exception as err:
        traceback.print_exc()
        print("Exception =", str(err))
    return ipo_details
Example #18
0
def get_nse_code(url):
    nse_code, isin_code = None, None
    try:
        bs = h.parse_url(url)
        if bs:
            divs = bs.findAll("div", {'class': "panel panel-default"})[2].find(
                'div', {'class': 'panel-body'})
            sub_div = divs.descendants
            for div in sub_div:
                if div.name == "li":
                    value = div.text
                    if "NSE Symbol:" in value:
                        nse_code = value.split(":")[1].strip()
                    elif "ISIN:" in value:
                        isin_code = value.split(":")[1].strip()
            if not nse_code:
                nse_code = isin_code

    except Exception as err:
        print("While parsing for NSE code", str(err))
    return nse_code
def get_shares_details(all_pages, first_time_process):
    # Variables declaration
    jobs = []
    spipe_list = []
    failed_que = multi.Queue()
    start = time.time()
    cpdus = multi.cpu_count()
    print("Total Process count = {}".format(cpdus))
    print("Total URL count = {}".format(len(all_pages)))
    page_bins = chunks(cpdus, all_pages)
    for cpdu in range(cpdus):
        recv_end, send_end = multi.Pipe(False)
        worker = multi.Process(target=process_page, args=(page_bins[cpdu], send_end, failed_que,))
        worker.daemon = True
        jobs.append(worker)
        spipe_list.append(recv_end)
        worker.start()

    # end_at = time.time() + (5)
    # while jobs:
    #     job = jobs.pop()
    #     delta = end_at - time.time()
    #     if delta > 0:
    #         job.join(timeout=delta)
    #     job.terminate()
    #     job.join()
    for job in jobs:
        job.join(timeout=10)
    print("All jobs completed......")

    # if first_time_process:
    #     result_list = [x.recv() for x in spipe_list]
    #     failed_pages = []
    #     while not failed_que.empty():
    #         failed_pages.append(failed_que.get())
    #     print("Parsing failed page count = {}".format(len(failed_pages)))
    #     get_shares_details(failed_pages, False)
    try:
        result_list = [x.recv() for x in spipe_list]
        final_data = {}
        ratio_links = []
        print("FAILED URL COUNT = {}".format(failed_que.qsize()))
        for results in result_list:
            print("size of the results array from result_list = ", len(results))
            for tmp_dict in results:
                key = tmp_dict.get("CATEGORY")
                link = tmp_dict.get("URL")
                ratio_links.append(define_mc_ratio_link(link))
                h.upd_dic_with_sub_list(key, tmp_dict, final_data)
        if ratio_links and len(ratio_links) > 0:
            print("Size of the RATIO array = ", len(ratio_links))
            h.write_list_to_json_file(os.path.join(
                commons.get_prop('base-path', 'output'), "5yrs_stk_ratio.txt"), ratio_links)

        # Set pandas options
        pd.set_option('display.max_columns', None)
        pd.set_option('display.expand_frame_repr', False)
        pd.set_option('max_colwidth', 0)
        for category in final_data:
            df = pd.DataFrame(final_data[category])
            cols = df.columns.drop(['STK_DATE', 'NSE_CODE', 'NAME', 'CATEGORY', 'SUB_CATEGORY', 'URL'])
            df[cols] = df[cols].apply(pd.to_numeric, errors='coerce')
            df = df.fillna(0)
            # print(df)
            if len(df) > 0:
                try:
                    df_columns = list(df)
                    table = "STK_DETAILS"
                    columns = ",".join(df_columns)
                    print("Batch started with count {} to insert into DB = ", len(df.values))
                    values = "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \
                             "%s, %s, %s, %s, %s, %s, to_date(%s, 'YYYMONDD'), %s, %s, %s);"
                    # create INSERT INTO table (columns) VALUES('%s',...)
                    insert_stmt = "INSERT INTO {} ({}) VALUES {}".format(table, columns, values)
                    curr, con = db.get_connection()
                    execute_batch(curr, insert_stmt, df.values)
                    con.commit()
                    db.close_connection(con, curr)
                    print("Batch inserted into DB successfully")

                except Exception as err:
                    print("While inserting data into DB exception = {}".format(err))

    except Exception as err:
        print("Exception in get_share_details function = {}".format(err))

    print("Execution time = {0:.5f}".format(time.time() - start))
Example #20
0
def get_ratios_from_mc(url):
    # url = "https://www.moneycontrol.com/financials/abhinavleasingfinance/ratiosVI/ALF03#ALF03"
    data_frame = None
    try:
        bs = h.parse_url(url)
        if bs:
            cmp_name = None
            title_data = bs.find('div', {
                'id': 'nChrtPrc'
            }).find('h1', {'class': 'b_42 PT20'})
            if title_data:
                cmp_name = title_data.text.strip()
            std_data = bs.find('div', {
                'class': 'PB10'
            }).find('div', {'class': 'FL gry10'})
            if std_data:
                header_parts = std_data.text.split("|")
                nse_code, isin, sector = None, None, None
                for part in header_parts:
                    name, val = part.split(":")[0].strip(), part.split(
                        ":")[1].strip()
                    if name == 'ISIN': isin = val
                    if name == 'NSE': nse_code = val
                    if name == 'SECTOR': sector = val
                # print("nse code = {}, isin = {}, sector = {}".format(nse_code, isin, sector))
                if not nse_code: nse_code = isin

                data = [[
                    [
                        td.string.strip() for td in tr.find_all('td')
                        if td.string
                    ] for tr in table.find_all('tr')[2:]
                ] for table in bs.find_all("table", {"class": "table4"})[2:]]

                if data and len(data) > 0:
                    STK_RATIO_ELEMENTS = {}
                    ele_list = data[0]
                    STK_RATIO_ELEMENTS['STK_YEAR'] = data[0][0]
                    ini_stk_ratio_dic(STK_RATIO_ELEMENTS,
                                      len(STK_RATIO_ELEMENTS['STK_YEAR']))
                    i = 2
                    while i < len(ele_list) - 4:
                        arr = ele_list[i]
                        if len(arr) > 5:
                            key = c.STK_RATIO_CON.get(arr[0])
                            val = arr[1:]
                            if key: STK_RATIO_ELEMENTS[key] = val
                        i += 1
                    print("STK RATIO = {} of Processing URL = {}".format(
                        STK_RATIO_ELEMENTS, url))
                    data_frame = get_data_frame(nse_code, sector, cmp_name,
                                                STK_RATIO_ELEMENTS)
                    data_frame.drop_duplicates(subset=['STK_YEAR', 'NSE_CODE'],
                                               inplace=True)
                else:
                    print("Key ratios are not listed to {}".format(url))

    except Exception as err:
        print("Error while parsing URL in get_ratio function = ", str(err))

    return data_frame