Example #1
0
def main():
    logger.info("Combining all the data from external sources together")
    dt = DataTransformation()
    try:
        dt.us()
        dt.jpx()
        dt.cn()
        dt.euronext()
        dt.aastocks()
        dt.lse()
        dt.ca()
        dt.frankfurt()
        dt.krx()
        dt.asx()
        dt.twse()
        dt.bme()
        dt.sgx()
        dt.idx()
        dt.bm()
        dt.nasdaqnordic()
        dt.spotlight()
        dt.italy()
    except Exception as e:
        logger.error(e, exc_info=sys.exc_info())
        error_email(str(e))
    finally:
        dt.formatting_all()
        dt.save_all()
Example #2
0
def main():
    wd = WebDriver()
    wd.random_wait()
    logger.info("Gathering data from sources")
    for k, v in wd.sources_dict.items():
        try:
            wd.load_url(v.get('url'), sleep_after=True)
            df = wd.parse_table(**v)
            if df is not None:
                s_file = os.path.join(wd.source_data_folder,
                                      v.get('file') + '.csv')
                if os.path.exists(s_file):
                    df = wd.update_existing_data(pd.read_csv(s_file),
                                                 df,
                                                 exclude_col='time_checked')
                df.sort_values(by='time_checked',
                               ascending=False,
                               inplace=True)
                df.to_csv(s_file, index=False, encoding='utf-8-sig')
                wd.webscraping_results.append([wd.time_checked_str, k, 1])
        except Exception as e:
            logger.error(f"ERROR for {k}")
            logger.error(e, exc_info=sys.exc_info())
            logger.info('-' * 100)
            error_screenshot_file = f"{k} Error {wd.time_checked.strftime('%Y-%m-%d %H%M')}.png"
            wd.driver.save_screenshot(
                os.path.join(log_folder, 'Screenshots', error_screenshot_file))
            wd.webscraping_results.append([wd.time_checked_str, k, 0])
            pass
    wd.asx()
    wd.tkipo()
    wd.close_driver()
    wd.av_api()
    wd.save_webscraping_results()
Example #3
0
    def update_withdrawn_ipos(self):
        """
        If an IPO is withdrawn, the RPD will be updated with a comment showing that the status is withdrawn
        and in the main data frame the RPD Status will be set to Resolved (so that I no longer update the RPD).

        :return:
        """
        df_wd = pd.merge(self.df_wd,
                         self.df_rpd,
                         how='inner',
                         on='formatted company name',
                         suffixes=('', '_'))
        if len(df_wd) > 0:
            df_wd['IPO Date'] = df_wd['IPO Date'].dt.strftime('%Y-%m-%d')
            logger.info(
                f"{len(df_wd)} RPDs to update for withdrwan IPOs: {', '.join([str(int(num)) for num in df_wd['RPD Number'].to_list()])}"
            )
            df_wd.replace(np.nan, '', inplace=True)
            for idx, row in df_wd.iterrows():
                rpd = int(row['RPD Number'])
                ipo_html = row[self.rpd_cols].to_frame().to_html(
                    header=False, na_rep='', justify='left')
                comment_endpoint = self.base_url + f'rpd/{int(rpd)}/comments'
                rpd_comment = {'Content': ipo_html}
                res_c = self.session.post(comment_endpoint,
                                          data=json.dumps(rpd_comment),
                                          headers=self.headers)
                self.df.loc[self.df['RPD Number'] == rpd,
                            'Status'] = 'Withdrawn'
                self.df.loc[self.df['RPD Number'] == rpd,
                            'RPD Status'] = 'Resolved'
Example #4
0
 def create_csv(self, recheck_all: bool = False):
     # create a dataframe of all company names without iconums including new names found
     df_e = pd.read_excel(
         self.entity_mapping_file,
         usecols=['Company Name', 'iconum', 'entity_id', 'mapStatus'])
     df_s = pd.read_excel(os.path.join(os.getcwd(), 'Results',
                                       'All IPOs.xlsx'),
                          usecols=['Company Name', 'Symbol', 'Market'])
     df = pd.merge(df_s, df_e, how='outer', on='Company Name')
     if recheck_all:
         # checking 1. company names that aren't null 2. don't have an iconum
         df = df.loc[~df['Company Name'].isna() & df['iconum'].isna()]
     else:
         # only checking 1. company names that aren't null 2. don't have an iconum and 3. haven't been checked yet
         df = df.loc[~df['Company Name'].isna() & df['iconum'].isna()
                     & df['mapStatus'].isna()]
     df = df.drop_duplicates()
     logger.info(f"{len(df)} unmapped entities")
     # making unique client_id by concatenating company name, symbol and market separated by underscores
     df['client_id'] = df['Company Name'].fillna('') + '_' + df[
         'Symbol'].fillna('').astype(str) + '_' + df['Market'].fillna('')
     df.set_index('client_id', inplace=True)
     # save that dataframe to a csv encoded as utf8
     if len(df) > 1:
         df.to_csv(self.file, index_label='client_id', encoding='utf-8-sig')
Example #5
0
 def get_task_status(self,
                     eid,
                     recheck_count: int = 0,
                     max_recheck=12,
                     wait_time=10):
     # get the status of the request
     entity_task_status_endpoint = 'https://api.factset.com/content/factset-concordance/v1/entity-task-status'
     status_parameters = {'taskId': str(eid)}
     entity_task_status_response = requests.get(
         url=entity_task_status_endpoint,
         params=status_parameters,
         auth=self.authorization,
         headers=self.headers,
         verify=False)
     entity_task_status_data = json.loads(entity_task_status_response.text)
     task_status = entity_task_status_data['data'][0]['status']
     if task_status in ['PENDING', 'IN-PROGRESS'
                        ] and recheck_count < max_recheck:
         recheck_count += 1
         sleep(wait_time)
         return self.get_task_status(eid, recheck_count)
     else:
         logger.info(
             f"Duration for Concordance API {entity_task_status_data['data'][0]['processDuration']}"
         )
         logger.info(
             f"Decision Rate for Concordance API {entity_task_status_data['data'][0]['decisionRate']}"
         )
         return task_status
Example #6
0
def main():
    logger.info("Comparing external data with data collected internally")
    dc = DataComparison()
    try:
        dc.concatenate_ticker_exchange()
        dc.file_for_rpds()
        return dc.compare()
    except Exception as e:
        logger.error(e, exc_info=sys.exc_info())
        error_email(str(e))
Example #7
0
def main():
    logger.info("Checking Cordance API for entity IDs")
    em = EntityMatchBulk()
    try:
        em.create_csv()
        em.entity_mapping_api()
    except Exception as e:
        logger.error(e, exc_info=sys.exc_info())
        logger.info('-' * 100)
        error_email(str(e))
Example #8
0
def delete_old_files(folder: str, num_days: int = 30) -> list:
    """
    Deletes files older than the number of days given as a parameter. Defaults to delete files more than 30 days old.
    :param folder: folder location files will be deleted from
    :param num_days: int specifying the number of days before a file is deleted
    :return: list of files that were deleted
    """
    old_date = datetime.utcnow() - timedelta(days=num_days)
    files_deleted = []
    for root, dirs, files in os.walk(folder):
        for file in files:
            f_abs = os.path.join(root, file)
            f_modified = datetime.fromtimestamp(os.path.getmtime(f_abs))
            if f_modified <= old_date:
                os.unlink(f_abs)
                files_deleted.append(file)
    if len(files_deleted) > 0:
        logger.info(f"Deleted {', '.join(files_deleted)}")
    return files_deleted
Example #9
0
 def av_api(self):
     try:
         requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
         parameters = {
             'function': self.config.get('AV', 'funct'),
             'apikey': self.config.get('AV', 'funct')
         }
         r = requests.get(self.config.get('AV', 'base_url'),
                          params=parameters,
                          verify=False)
         cal = [[cell.replace('\r', '') for cell in row.split(',')]
                for row in r.text.split('\n')]
         df = pd.DataFrame(cal)
         df.columns = df.loc[0]
         df = df.drop(0).reset_index(drop=True)
         df = df.dropna()
         df.loc[df['name'].str.contains(r' Warrant'),
                'assetType'] = 'Warrants'
         df.loc[df['name'].str.contains(r' Right'), 'assetType'] = 'Rights'
         df.loc[df['name'].str.contains(r' Unit'), 'assetType'] = 'Units'
         df['assetType'].fillna('Shares', inplace=True)
         for c in ['priceRangeLow', 'priceRangeHigh']:
             df[c] = pd.to_numeric(df[c], errors='coerce')
         df['time_checked'] = self.time_checked_str
         df.sort_values(by=['ipoDate', 'name'], inplace=True)
         s_file = os.path.join(self.source_data_folder,
                               self.config.get('AV', 'file_name') + '.csv')
         if os.path.exists(s_file):
             df = self.update_existing_data(pd.read_csv(s_file),
                                            df,
                                            exclude_col='time_checked')
         df.sort_values(by='time_checked', ascending=False, inplace=True)
         df.to_csv(s_file, index=False, encoding='utf-8-sig')
         self.webscraping_results.append(
             [self.time_checked_str,
              self.config.get('AV', 'file_name'), 1])
     except Exception as e:
         logger.error(f"ERROR for AV")
         logger.error(e, exc_info=sys.exc_info())
         logger.info('-' * 100)
         self.webscraping_results.append([self.time_checked_str, 'AV', 0])
Example #10
0
 def asx(self):
     try:
         self.driver.get(
             'https://www2.asx.com.au/listings/upcoming-floats-and-listings'
         )
         soup = self.return_soup()
         listing_info = [
             co.text.strip()
             for co in soup.find_all('span',
                                     attrs={'class': 'gtm-accordion'})
         ]
         df = pd.DataFrame(listing_info)
         df.columns = ['listing_info']
         df['Company Name'] = df['listing_info'].str.extract(
             r'^([a-zA-Z0-9\s,\.&]*)\s\-')
         df['IPO Date'] = df['listing_info'].str.extract(
             r'\s*-\s*(\d{1,2}\s\w*\s\d{2,4})')
         df['IPO Date'] = pd.to_datetime(df['IPO Date'],
                                         errors='coerce').dt.date
         df['Market'] = 'Australian Stock Exchange'
         df['time_checked'] = self.time_checked_str
         if df is not None:
             s_file = os.path.join(self.source_data_folder, 'ASX.csv')
             if os.path.exists(s_file):
                 df = self.update_existing_data(pd.read_csv(s_file),
                                                df,
                                                exclude_col='time_checked')
             df.sort_values(by='time_checked',
                            ascending=False,
                            inplace=True)
             df.to_csv(s_file, index=False, encoding='utf-8-sig')
             self.webscraping_results.append(
                 [self.time_checked_str, 'ASX', 1])
     except Exception as e:
         logger.error(f"ERROR for ASX")
         logger.error(e, exc_info=sys.exc_info())
         logger.info('-' * 100)
         error_screenshot_file = f"ASX Error {self.time_checked.strftime('%Y-%m-%d %H%M')}.png"
         self.driver.save_screenshot(
             os.path.join(log_folder, 'Screenshots', error_screenshot_file))
         self.webscraping_results.append([self.time_checked_str, 'ASX', 0])
Example #11
0
def email_report(attach_file=None, addtl_message: str = ''):
    """
    Emails the report as an attachment.
    Email details like sender and recipients are provided in .ini file which is read by configparser.
    :param attach_file: path of a file or list of files which will be attached to the email
    :param addtl_message: optional string that can be added to body of email
    :return: None
    """
    outlook = win32.Dispatch('outlook.application')
    mail = outlook.CreateItem(0)
    mail.To = config.get('Email', 'To')
    mail.Sender = config.get('Email', 'Sender')
    mail.Subject = f"{config.get('Email', 'Subject')} {today_date}"
    mail.HTMLBody = config.get('Email', 'Body') + addtl_message + config.get('Email', 'Signature')
    if isinstance(attach_file, str) and os.path.exists(attach_file):
        mail.Attachments.Add(attach_file)
    elif isinstance(attach_file, list):
        for f in attach_file:
            mail.Attachments.Add(f)
    mail.Send()
    logger.info('Email sent')
Example #12
0
 def update_existing_data(old_df: pd.DataFrame,
                          new_df: pd.DataFrame,
                          exclude_col=None) -> pd.DataFrame:
     """
     If there is already existing data, this function can be called to remove any duplicates.
     :param old_df: DataFrame with existing data
     :param new_df: DataFrame with new data
     :param exclude_col: Column(s) that will be excluded when removing duplicate values in DataFrames.
                         Can be given either as a list of columns or a string with the column name.
     :return: DataFrame
     """
     try:
         df = pd.concat([old_df, new_df.astype(old_df.dtypes)],
                        ignore_index=True,
                        sort=False)
     except KeyError as ke:
         logger.error(ke)
         logger.info(f"Existing df columns: {', '.join(old_df.columns)}")
         logger.info(f"New df columns: {', '.join(new_df.columns)}")
     except ValueError as ve:
         logger.error(ve)
         logger.info(
             f"Existing df data types: \n{old_df.dtypes.to_string(na_rep='')}"
         )
         logger.info(
             f"New df data types: \n{new_df.dtypes.to_string(na_rep='')}")
         df = pd.concat([old_df, new_df], ignore_index=True, sort=False)
     if exclude_col and isinstance(exclude_col, str):
         ss = [col for col in df.columns.to_list() if col != exclude_col]
     elif exclude_col and isinstance(exclude_col, list):
         ss = [
             col for col in df.columns.to_list() if col not in exclude_col
         ]
     else:
         ss = df.columns.to_list()
     # I want to preserve when this item was first added to the website and have most recent updates at the top so
     # sorting by most recent time_checked, dropping duplicates for subset of columns and keeping the last (earliest)
     if 'time_checked' in df.columns:
         df.sort_values(by='time_checked', ascending=False, inplace=True)
     df.drop_duplicates(subset=ss, keep='last', inplace=True)
     return df
Example #13
0
 def entity_mapping_api(self):
     if os.path.exists(self.file):
         # create request with concordance API
         entity_task_endpoint = 'https://api.factset.com/content/factset-concordance/v1/entity-task'
         entity_task_request = {
             'taskName': self.file_name,
             'clientIdColumn': 'client_id',
             'nameColumn': 'Company Name',
             'includeEntityType': ['PUB', 'PVT', 'HOL', 'SUB'],
             'uniqueMatch': True
         }
         with open(self.file, 'rb') as f:
             file_data = {
                 'inputFile': (self.file_name + '.csv', f, 'text/csv')
             }
             entity_task_response = requests.post(url=entity_task_endpoint,
                                                  data=entity_task_request,
                                                  auth=self.authorization,
                                                  files=file_data,
                                                  headers=self.headers)
         assert entity_task_response.ok, f"{entity_task_response.status_code} - {entity_task_response.text}"
         # temporarily saving entity task response to look into errors
         # getting Bad Request - Number of elements in the header doesn't match the total number of columns
         with open(os.path.join(log_folder, 'Concordance API Responses',
                                f"API response for {self.file_name}.txt"),
                   'w',
                   encoding='utf8') as f:
             json.dump(entity_task_response.text, f, ensure_ascii=False)
         if entity_task_response.text is not None and entity_task_response.text != '':
             entity_task_data = json.loads(entity_task_response.text)
             eid = entity_task_data['data']['taskId']
             task_name = entity_task_data['data'][
                 'taskName']  # will be file_name provided in entity task request
             logger.info(
                 f"Entity mapping request submitted - task ID {eid} - task name {task_name}"
             )
             task_status = self.get_task_status(eid)
             logger.info(f"Task {eid} status - {task_status}")
             if task_status == 'SUCCESS':
                 df_result = self.get_entity_decisions(eid)
                 self.formatting_and_saving(df_result)
     else:
         logger.info(f"File not found - {self.file}")
Example #14
0
 def tkipo(self):
     try:
         self.driver.get(
             'http://www.tokyoipo.com/top/iposche/index.php?j_e=E')
         soup = self.return_soup()
         table = soup.find('table', attrs={'class': 'iposchedulelist'})
         table_data = []
         row = []
         for r in table.find_all('tr'):
             for cell in r.find_all('td'):
                 cell_text = cell.text.strip()
                 if '\n\n▶\xa0Stock/Chart' in cell_text:
                     table_data.append(row)
                     row = [cell_text.replace('\n\n▶\xa0Stock/Chart', '')]
                 else:
                     row.append(cell_text)
         table_data.append(row)
         df = pd.DataFrame(table_data)
         df.columns = [
             'Company Name', 'IPO Date', 'Symbol', 'Listed Shares',
             'Blank_0', 'Price Range', 'Price', 'Book Building Period',
             'Opening Price', 'Change', 'Lead Underwriter',
             'Business Description', 'Blank_1'
         ]
         df.replace('', np.nan, inplace=True)
         df.dropna(how='all', inplace=True)
         df.drop(columns=['Blank_0', 'Business Description', 'Blank_1'],
                 inplace=True,
                 errors='ignore')
         df['Company Name'] = df['Company Name'].str.strip()
         df['Price Range Expected Date'] = df['Price Range'].str.extract(
             r'^(\d{0,2}\/\d{0,2})$')
         df['Price Expected Date'] = df['Price'].str.extract(
             r'^(\d{0,2}\/\d{0,2})$')
         df['Price'] = pd.to_numeric(df['Price'].str.replace(',', ''),
                                     errors='coerce')
         # date is provided as mm/dd, adding current year to make the date formatted as mm/dd/yyyy
         df['IPO Date'] = df['IPO Date'] + f"/{datetime.now().year}"
         df['IPO Date'] = pd.to_datetime(df['IPO Date'],
                                         errors='coerce').dt.date
         # at the beginning of the year, the calendar will still show IPOs from last year
         # adding the current year to that previous date will be incorrect
         # those incorrect dates will be 6+ months away, we shouldn't see legitimate IPO dates that far in advance
         # if the IPO date is more than 6 months away, I subtract 1 year from the IPO date
         df.loc[df['IPO Date'] >
                (pd.to_datetime('today') + pd.offsets.DateOffset(months=6)),
                'IPO Date'] = df['IPO Date'] - pd.offsets.DateOffset(
                    years=1)
         df['Market'] = 'Japan Stock Exchange' + ' - ' + df[
             'Symbol'].str.extract(r'\((\w*)\)')
         df['Symbol'] = df['Symbol'].str.replace(r'(\(\w*\))',
                                                 '',
                                                 regex=True)
         df['time_checked'] = self.time_checked_str
         if df is not None:
             s_file = os.path.join(self.source_data_folder, 'TokyoIPO.csv')
             if os.path.exists(s_file):
                 df = self.update_existing_data(pd.read_csv(s_file),
                                                df,
                                                exclude_col='time_checked')
             df.sort_values(by='time_checked',
                            ascending=False,
                            inplace=True)
             df.to_csv(s_file, index=False, encoding='utf-8-sig')
             self.webscraping_results.append(
                 [self.time_checked_str, 'TokyoIPO', 1])
     except Exception as e:
         logger.error(f"ERROR for TokyoIPO")
         logger.error(e, exc_info=sys.exc_info())
         logger.info('-' * 100)
         error_screenshot_file = f"TokyoIPO Error {self.time_checked.strftime('%Y-%m-%d %H%M')}.png"
         self.driver.save_screenshot(
             os.path.join(log_folder, 'Screenshots', error_screenshot_file))
         self.webscraping_results.append(
             [self.time_checked_str, 'TokyoIPO', 0])
Example #15
0
    def create_new_rpds(self) -> dict:
        """
        Creates new RPDs for all the IPOs that currently do not have an RPD.

        :return: Dictionary with data about the new RPDs created
        """
        endpoint = self.base_url + 'rpd'
        rpd_dict = defaultdict(list)
        df_rpd = self.df.copy()
        # filtering for only IPOs that do not have an RPD Number
        df_rpd = df_rpd.loc[df_rpd['RPD Number'].isna()]
        df_rpd['IPO Date'] = df_rpd['IPO Date'].dt.strftime('%Y-%m-%d')
        df_rpd.replace(np.nan, '', inplace=True)
        for idx, row in df_rpd.iterrows():
            company_name = str(row['Company Name'])
            exchange = str(row['Market'])
            fds_cusip = str(row['CUSIP'])
            ipo_date = str(row['IPO Date'])
            ticker = str(row['Symbol'])
            ipo_html = row[self.rpd_cols].to_frame().to_html(header=False,
                                                             na_rep='',
                                                             justify='left')
            rpd_request = {
                'Title':
                f"{company_name} - {exchange}",
                'Products': [{
                    'Id': '106317'
                }],
                'Content':
                ipo_html,
                'Type':
                'EnhancementRequest',
                'Priority':
                'Medium',
                'Severity':
                'Medium',
                'Questions': [{
                    'Id': 31407,
                    'Answers': [{
                        'AnswerValue': fds_cusip
                    }]
                }, {
                    'Id': 31405,
                    'Answers': [{
                        'AnswerValue': ipo_date
                    }]
                }, {
                    'Id': 31406,
                    'Answers': [{
                        'AnswerValue': exchange
                    }]
                }, {
                    'Id': 31408,
                    'Answers': [{
                        'AnswerValue': ticker
                    }]
                }]
            }
            res = self.session.post(url=endpoint,
                                    data=json.dumps(rpd_request),
                                    headers=self.headers)
            if res.ok:
                rpd_num = res.headers['X-IS-ID']
                rpd_date = res.headers['Date']
                # rpd_api_link = res.headers['Location']
                rpd_dict['Company Name'].append(company_name)
                rpd_dict['RPD Number'].append(rpd_num)
                rpd_dict['RPD Link'].append(
                    'https://is.factset.com/rpd/summary.aspx?messageId=' +
                    str(rpd_num))
                rpd_dict['RPD Creation Date'].append(rpd_date)
                rpd_dict['RPD Status'].append('Pending')
            sleep(1)
        logger.info(
            f"Created {len(rpd_dict['RPD Number'])} new RPDs: {', '.join([str(num) for num in rpd_dict['RPD Number']])}"
        )
        return rpd_dict
Example #16
0
    def update_rpds(self):
        """
        Updating existing RPDs when either the IPO Date, CUSIP or Symbol have changed.
        The data is merged so that I can see what data has changed (if any).

        :return:
        """
        df_rpd = pd.merge(self.df_ipo,
                          self.df_rpd,
                          how='left',
                          on='formatted company name',
                          suffixes=('', '_old'))
        df_rpd = df_rpd.loc[(df_rpd['RPD Number'].notna())
                            & (df_rpd['RPD Status'] != 'Resolved')]
        # only make one update per RPD using the latest information
        df_rpd.sort_values(by=['Last Checked'], ascending=False, inplace=True)
        df_rpd.drop_duplicates(subset='RPD Number',
                               inplace=True,
                               ignore_index=True)
        # compare the data
        df_rpd['IPO Date Comparison'] = df_rpd['IPO Date'] == df_rpd[
            'IPO Date_old']
        df_rpd['Market Comparison'] = df_rpd['Market'] == df_rpd['Market_old']
        df_rpd['CUSIP Comparison'] = df_rpd['CUSIP'] == df_rpd['CUSIP_old']
        df_rpd.loc[df_rpd['CUSIP'].isna(), 'CUSIP Comparison'] = True
        df_rpd['Symbol Comparison'] = df_rpd['Symbol'] == df_rpd['Symbol_old']
        df_rpd.loc[df_rpd['Symbol'].isna(), 'Symbol Comparison'] = True
        # filter for only updated data (i.e. where comparison is False)
        df_rpd = df_rpd.loc[(~df_rpd['IPO Date Comparison'])
                            | (~df_rpd['CUSIP Comparison'])
                            | (~df_rpd['Symbol Comparison'])
                            # | (~df_rpd['Market Comparison'])
                            ]
        # update the main data frame with updated data to prevent from making the same comment multiple times
        # note this will only update when IPO date, CUSIP or Symbol have changed. Other changes won't be added.
        df_rpd = df_rpd[[
            'iconum', 'CUSIP', 'Company Name', 'Symbol', 'Market', 'IPO Date',
            'Price', 'Price Range', 'Status', 'Notes', 'Last Checked',
            'IPO Deal ID', 'formatted company name', 'RPD Number', 'RPD Link',
            'RPD Creation Date', 'RPD Status'
        ]]
        self.df = pd.concat([self.df, df_rpd],
                            ignore_index=True).drop_duplicates(subset=[
                                'RPD Number', 'formatted company name'
                            ],
                                                               keep='last')
        logger.info(
            f"{len(df_rpd)} updates to make on existing RPDs: {', '.join([str(int(num)) for num in df_rpd['RPD Number'].to_list()])}"
        )
        df_rpd['IPO Date'] = df_rpd['IPO Date'].dt.strftime('%Y-%m-%d')
        df_rpd.replace(np.nan, '', inplace=True)
        for idx, row in df_rpd.iterrows():
            rpd = int(row['RPD Number'])
            rpd_status = self.get_rpd_status(rpd)
            # update the main data frame with the status
            self.df.loc[self.df['RPD Number'] == rpd,
                        'RPD Status'] = rpd_status
            if rpd_status == 'Resolved':
                rpd_resolution = self.get_rpd_resolution(rpd)
                dupe_rpd = rpd_resolution.get('DuplicateRPD')
                if dupe_rpd:
                    # if RPD is resolved and not a duplicate dupe_rpd will be None
                    # if this RPD was resolved as a duplicate RPD, update the main data frame with the new RPD number
                    # not updating comments of the dupe RPD since that should already be done in the row for that RPD
                    self.df.loc[
                        self.df['RPD Number'] == rpd,
                        'RPD Link'] = 'https://is.factset.com/rpd/summary.aspx?messageId=' + str(
                            dupe_rpd)
                    self.df.loc[self.df['RPD Number'] == rpd,
                                'RPD Number'] = dupe_rpd
                    self.df.loc[self.df['RPD Number'] == rpd,
                                'RPD Status'] = ''
            else:
                # only adding comments to RPDs that have not been resolved (will still add comments to completed RPDs)
                fds_cusip = str(row['CUSIP'])
                ipo_date = str(row['IPO Date'])
                ticker = str(row['Symbol'])
                exchange = str(row['Market'])
                ipo_html = row[self.rpd_cols].to_frame().to_html(
                    header=False, na_rep='', justify='left')
                comment_endpoint = self.base_url + f'rpd/{int(rpd)}/comments'
                rpd_comment = {'Content': ipo_html}
                res_c = self.session.post(comment_endpoint,
                                          data=json.dumps(rpd_comment),
                                          headers=self.headers)

                question_endpoint = self.base_url + f'rpd/{int(rpd)}/questions'
                questions = [{
                    'Id': 31407,
                    'Answers': [{
                        'AnswerValue': fds_cusip
                    }]
                }, {
                    'Id': 31405,
                    'Answers': [{
                        'AnswerValue': ipo_date
                    }]
                }, {
                    'Id': 31406,
                    'Answers': [{
                        'AnswerValue': exchange
                    }]
                }, {
                    'Id': 31408,
                    'Answers': [{
                        'AnswerValue': ticker
                    }]
                }]
                res_q = self.session.post(question_endpoint,
                                          data=json.dumps(questions),
                                          headers=self.headers)
Example #17
0
from logging_ipo_dates import logger, consolidate_webscraping_results
import source_reference
import website_scraping
import data_transformation
import entity_mapping
import data_comparison
import file_management
import rpd_creation

logger.info('-' * 100)
source_reference.main()
website_scraping.main()
data_transformation.main()
entity_mapping.main()
df_summary = data_comparison.main()
# Note: email_report is being called separately with another batch file because the run schedule is different
rpd_creation.main()
consolidate_webscraping_results()
file_management.main()
logger.info('-' * 100)