예제 #1
0
	def get_all_rics_data(self):

		writer = pd.ExcelWriter('00 BAP FY18-1-2-3 Numbers.xlsx')
		for ric in self.rics:
			print(ric.upper())
			self.Q1CompanyData_sheet = self.read_file_source(self.path_quarter_one, ric, WorkSheet.bap_company_old.value)
			self.Q2CompanyData_sheet = self.read_file_source(self.path_quarter_two, ric, WorkSheet.bap_company_old.value)
			self.Q3CompanyData_sheet = self.read_file_source(self.path_quarter_three, ric, WorkSheet.bap_company.value)

			data_source = Common.set_datasource(ric)
			batch1 = db.pandas_read(self.batch.format(self.year, self.Q1, data_source, SourceSystemType.RICCD_bap.value))['BatchID']
			batch2 = db.pandas_read(self.batch.format(self.year, self.Q2, data_source, SourceSystemType.RICCD_bap.value))['BatchID']
			batch3 = db.pandas_read(self.batch.format(self.year, self.Q3, data_source, SourceSystemType.RICCD_bap.value))['BatchID']

			self.Q1CompanyData = db.pandas_read(self.selectQ1.format(str(batch1[0]) + ' ORDER BY CompanyName'))
			self.Q2CompanyData = db.pandas_read(self.select.format('Config.CompanyDataRaw', str(batch2[0]) + ' ORDER BY CompanyName'))
			self.Q3CompanyData = db.pandas_read(self.select.format('BAP.QuarterlyCompanyData', str(batch3[0]) + ' ORDER BY [Company Name]'))

			self.Q1CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch1[0]))
			self.Q2CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch2[0]))
			self.Q3CompanyData_fact_ric = db.pandas_read(self.select.format('Reporting.FactRICCompanyData', batch3[0]))

			self.Q1CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 1, data_source))
			self.Q2CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 2, data_source))
			self.Q3CompanyData_rollup = db.pandas_read(SQL.sql_rollup_select.value.format(self.year, 3, data_source))

			df_ric = self.bap_summary()
			if df_ric is not None:
				df_ric.to_excel(writer, ric.upper(), index=False)
			df_ric = None
		writer.save()
예제 #2
0
    def combine_bap_missing_source_file(self, current_path=''):
        clms = [
            'CompanyName', 'Website', 'AnnualRevenue', 'NumberOfEmployees',
            'FundingToDate', 'DataSource', 'Fiscal_Quarter', 'FiscalYear'
        ]
        if current_path != '':
            current_path = os.path.join(os.path.expanduser("~"), current_path)
            os.chdir(current_path)
            self.source_file = os.listdir(current_path)

        file_list = self.get_source_file()
        q_company = []
        i = 0

        for fl in file_list:
            try:
                i += 1
                ds = COM.set_datasource(str(fl))
                if ds is not None:
                    com = pd.read_excel(fl, WS.bap_company.value)
                    print('{}.[{}] {} -->{}'.format(i, ds, fl,
                                                    len(com.columns)))
                    # print(com.head())
                    com.insert(5, 'DataSource', ds)
                    com.columns = clms
                    q_company.append(com)
                else:
                    print('\tMissing - {}'.format(fl))
            except Exception as ex:
                print(ex)
        print('\n\n')
        bap_company = pd.concat(q_company)

        return bap_company
예제 #3
0
    def read_source_file(self,
                         ftype,
                         data_source,
                         combine_for: Combine,
                         file_name='',
                         current_path=''):
        com_a = None
        if current_path != '':
            current_path = os.path.join(os.path.expanduser("~"), current_path)
            os.chdir(current_path)
            self.source_file = os.listdir(current_path)

        file_list = self.get_source_file()

        if data_source == DS.BAP:
            if ftype == FT.SPREAD_SHEET.value:
                l_company, l_company_annual, l_program, l_program_youth = self.bap_dataframes(
                )
                i = 0
                for fl in file_list:
                    try:
                        i += 1
                        ds = COM.set_datasource(str(fl))
                        print('{}. {}'.format(i, fl))
                        prg = pd.read_excel(fl, WS.bap_program.value)
                        prg.drop(prg.columns[[13]], inplace=True, axis=1)
                        prg.columns = self.program_columns

                        prg_youth = pd.read_excel(fl,
                                                  WS.bap_program_youth.value)
                        prg_youth.drop(prg_youth.columns[2],
                                       inplace=True,
                                       axis=1)
                        prg_youth.columns = self.program_youth_columns
                        com = pd.read_excel(fl, WS.bap_company.value)
                        com.columns = self.quarterly_company_columns
                        if ds in [DST.COMMUNI_TECH.value, DST.HAL_TECH.value
                                  ] and self.quarter - 1 == 3:
                            com_a = pd.read_excel(fl,
                                                  WS.bap_company_annual.value)
                            com_a.columns = self.annual_company_columns
                            l_company_annual.append(com_a)
                        if combine_for == Combine.FOR_ETL:
                            FileService.data_system_source(
                                prg, prg_youth, com, com_a, os.getcwd(),
                                str(fl), ds)

                        l_program.append(prg)
                        l_program_youth.append(prg_youth)
                        l_company.append(com)

                    except Exception as ex:
                        print(ex)
                bap_program = pd.concat(l_program)
                bap_program_youth = pd.concat(l_program_youth)
                bap_company = pd.concat(l_company)
                if self.quarter - 1 == 4:  #combine_for == Combine.FOR_ETL or combine_for == Combine.FOR_QA:
                    bap_company_annual = pd.concat(l_company_annual)
                    return bap_program, bap_program_youth, bap_company, bap_company_annual
                else:
                    return bap_program, bap_program_youth, bap_company, None

        elif data_source == DS.CBINSIGHT:
            if ftype == FT.CSV:
                if file_name != '':
                    return pd.read_csv(file_name)
                else:
                    for fl in file_list:
                        self.data_list.append(pd.read_csv(fl))
                    return self.data_list

        elif data_source == DS.OSVP:
            print('')
            print('')

        elif data_source == DS.OTHER:
            if ftype == FT.SPREAD_SHEET:
                target_list = []
                self.target_list_dataframe()
                j = 0
                for f in file_list:
                    j += 1
                    print('{}. {}'.format(j, f))
                    tl = pd.read_excel(f, WS.target_list.value)

                    tl.insert(5, 'Venture_basic_name', None)
                    datasource = COM.set_datasource(f)
                    tl.insert(0, 'DataSource', datasource)
                    tl.insert(0, 'Worksheet', str(WS.target_list.value))
                    tl.insert(0, 'FileName', str(f))
                    tl.insert(0, 'Path', self.path)
                    tl.insert(0, 'CompanyID', None)
                    tl.insert(0, 'BatchID', 0)
                    tl['OSVP_Status'] = None
                    tl['CII'] = None
                    tl['Year'] = self.year
                    tl['Status'] = 1
                    # tl.columns = self.tl_columns
                    # tl['Date_founded'] = tl['Date_founded'][:10]
                    # tl['Date_of_incorporation'] = tl['Date_of_incorporation'][:10]
                    target_list.append(tl)
                    print('{} - {}'.format(len(tl.columns), tl.columns))

                df_tl = pd.concat(target_list)
                return df_tl

        elif data_source == DS.IAF:
            if ftype == FT.SPREAD_SHEET:
                iaf_list = []
                k = 0
                for ia in file_list:
                    k += 1
                    print('{}. {}'.format(k, ia))
                    iaf = pd.read_excel(ia, sheet_name=None)
                    for i in range(len(list(iaf.items()))):
                        if i == 0:
                            df_summary = list(iaf.items())[i][1]
                        elif i > 0:
                            df = list(iaf.items())[i][1][:-8].T[1:]
                            # print(df[1][0])
                            # print(list(iaf.items())[1][1]['General Info - Firm / Entrepreneur'])
                            iaf_list.append(df)
                    df_iaf = pd.concat(iaf_list)
                    df_iaf = df_iaf.where(pd.notnull(df_iaf), None)
                    df_summary = df_summary.where(pd.notnull(df_summary), None)
                return df_iaf, df_summary