def assessment(self): year_css_selector = "#assesmentInformationContainer " \ "[data-bind='if: showAssessedYear()'] span" year = self._pq_doc(year_css_selector).text() if year: seg_prefixes = 'Assessment_{0} Assessment'.format(year) else: seg_prefixes = 'Assessment' headers_css_selector = '#assesmentInformationContainer .subheader' row_css_selector = '#assesmentInformationContainer .column' cell_selector = '.row' crawl_data = [] for r in self._pq_doc(row_css_selector).items(): crawl_data.append([item.text() for item in r(".row").items()][1:]) crawl_data = crawl_data[1:] t_h, t_d = tool_funcs.crawl_table( self._pq_doc, headers_css=headers_css_selector, prepared_table_data=crawl_data, cell_css=cell_selector, seg_prefixes=seg_prefixes, rearrange_table_method='numbering_headers', data_start=1) return zip(t_h, t_d)
def space(self): headers_css_selector = "[data-viewmodelname='propertySpaces'] thead th" row_css_selector = "[data-viewmodelname='propertySpaces'] tbody tr" cell_css_selector = 'td' t_h, t_d = tool_funcs.crawl_table(self._pq_doc, headers_css_selector, row_css_selector, cell_css_selector, 'Space', 'numbering_headers') return zip(t_h, t_d)
def unit_mix(self): headers_css_selector = '#UnitMixTable thead th' row_css_selector = '#UnitMixTable tbody tr' cell_css_selector = 'td' t_h, t_d = tool_funcs.crawl_table(self._pq_doc, headers_css_selector, row_css_selector, cell_css_selector, 'Unit Mix', 'numbering_headers') return zip(t_h, t_d)
def leasing_activity(self): headers_css_selector = "#LeasingActivityTable thead th" row_css_selector = "#LeasingActivityTable tbody tr" cell_css_selector = 'td' t_h, t_d = tool_funcs.crawl_table(self._pq_doc, headers_css_selector, row_css_selector, cell_css_selector, 'Leasing Activity', 'numbering_headers') return zip(t_h, t_d)
def tenants(self): headers_css_selector = '#TenantsTable thead th' row_css_selector = '#TenantsTable tbody tr' cell_css_selector = 'td' t_h, t_d = tool_funcs.crawl_table(self._pq_doc, headers_css_selector, row_css_selector, cell_css_selector, 'Tenants', 'numbering_headers', replace_string="•\n") return zip(t_h, t_d)
def demographics(self): headers_css_selector = '#DemogrpahicsTable thead th' top_row_css_selector = '#DemogrpahicsTable tbody tr' bot_row_css_selector = '#DemogrpahicsTrendTable tbody tr' cell_css_selector = 'td' t_h, t_d = tool_funcs.crawl_table( self._pq_doc, headers_css_selector, top_row_css_selector, cell_css_selector, 'Demographics', 'cross_headers', ignore_display_items=False, additional_table_rows_css=[bot_row_css_selector]) return zip(t_h, t_d)
def public_transportation(self): """public_transportation(self) -> <zip obj. of (list, list)> This part usually has at most three sub-tables, which are 'Airport', 'Commuter Rail' and 'Transit/Subway'. Therefore, all there three sub-tables' headers-css and rows-css are put in a list and then passed to the tool_funcs.crawl_table() function. :return: <zip obj. of the 1-D headers and data> """ headers_css_selectors = [ ".public-transportation-layout " "[data-bind='visible: hasSubways'] .head .column", ".public-transportation-layout " "[data-bind='visible: hasCommuterRail'] .head .column", ".public-transportation-layout " "[data-bind='visible: hasAirports'] .head .column" ] rows_css_selectors = [ ".public-transportation-layout " "[data-bind='visible: hasSubways'] " "[data-bind='foreach: data.Items'] .row", ".public-transportation-layout " "[data-bind='visible: hasCommuterRail'] " "[data-bind='foreach: data.Items'] .row", ".public-transportation-layout " "[data-bind='visible: hasAirports'] " "[data-bind='foreach: data.Items'] .row" ] cell_css_selector = '.column' t_h = [] t_d = [] for i, h_css in enumerate(headers_css_selectors): each_t_h, each_t_d = tool_funcs.crawl_table( self._pq_doc, h_css, rows_css_selectors[i], cell_css_selector, 'Public Transportation', 'same_headers') t_h += each_t_h t_d += each_t_d return zip(t_h, t_d)
def traffic(self): """traffic(self) -> <zip obj. of (list, list)> The Traffic segments contains a table with a row of table-header and several data rows. :param: :table: See in tool_funcs.py -> def rearrange_table_numbering_headers(table) :t_h: See in tool_funcs.py -> def rearrange_table_numbering_headers(table) :t_d: See in tool_funcs.py -> def rearrange_table_numbering_headers(table) :return: A <zip obj.> consists of the <generator obj. of table_headers> and <iterator obj. of table_rows> """ headers_css_selector = '#TrafficTable thead th' row_css_selector = '#TrafficTable tbody tr' cell_css_selector = 'td' t_h, t_d = tool_funcs.crawl_table(self._pq_doc, headers_css_selector, row_css_selector, cell_css_selector, 'Traffic', 'numbering_headers') return zip(t_h, t_d)
def market_conditions(self): """market_conditions(self) -> <zip obj. of (list, list)> The segment of Market Conditions have two main parts, one for sub-table (top tables) with headers of 'Current' and 'YOY Change', the other (bot table) with headers of 'Current' and 'Prev Year'. In this part, all headers of each sub-table needs to be updated as desired. So the headers are crawled before passing the headers css into the tool_funcs.crawl_table function. ":param: :segments: The segments names list contained in the 'Market Conditions' part. :up_headers: Usually are 'Current' and 'YOY Change' :bot_heades: Usually are 'Current' and 'Prev Year' :segments_data_bind_css: This css is used to locate the data which needs to be crawled in each segment. It's a <dict. obj.> corresponding to the segments names. :t_h, t_d: The final 1-D headers list and 1-D data list for generating the <zip obj.> :return: <zip obj. of the 1-D headers and data> """ segments_css_selectors = '.property-marketConditions h4' segments = [ item.text() for item in self._pq_doc(segments_css_selectors).items() ] up_headers_css_selectors = '.property-marketConditions ' \ '.section-header.column' up_headers = [ item.text() for item in self._pq_doc(up_headers_css_selectors).items() ] bot_headers_css_selectors = '.property-marketConditions ' \ '.headerLabel' bot_headers = [ item.text() for item in self._pq_doc(bot_headers_css_selectors).items() ] segments_data_bind_css = { 'Submarket Leasing Activity': '[data-bind="visible: hasTwelveMonthActivity"]', 'Asking Rents Per SF': '[data-bind="visible: hasAskingRent"]', 'Asking Rents Per Unit': '[data-bind="visible: hasAskingRent"]', 'Gross Asking Rents Per SF': '[data-bind="visible: hasAskingRent"]', 'NNN Asking Rents Per SF': '[data-bind="visible: hasAskingRent"]', 'Same Store Asking Rent Per Unit': '[data-bind="visible: hasAskingRent"]', 'Same Store Asking Rent Per SF': '[data-bind="visible: hasAskingRent"]', 'Concessions': '[data-bind="visible: hasConcessions"]', 'Under Construction Units': '[data-bind="visible: hasConstructionUnits"]', 'Submarket Sales Activity': '[data-bind="visible: hasSalesActivity"]', 'Vacancy Rates': '[data-bind="visible: hasVacancyRate"]' } t_h = [] t_d = [] for seg in segments: data_css_selector = segments_data_bind_css[seg] + " .table-row" # Only the last segment has the headers of 'Current' and 'Prev Year' if seg != segments[-1]: # The first several sub-tables. seg_headers = [seg] + up_headers seg_data = [ item.text().split('\n') for item in self._pq_doc(data_css_selector).items() ] else: # The last sub-table, which has a different table structure. seg_headers = [seg] + bot_headers seg_data = [ item.text().split('\n') for item in self._pq_doc(data_css_selector).items() ][1:] # seg_t_h, seg_t_d corresponds to each sub-table's headers and data. seg_t_h, seg_t_d = tool_funcs.crawl_table( self._pq_doc, seg_prefixes='Market Conditions', prepared_table_headers=seg_headers, prepared_table_data=seg_data, rearrange_table_method='cross_headers') # Join all sub-table's 1-D headers and data in to the final lists. t_h += seg_t_h t_d += seg_t_d return zip(t_h, t_d)