def parse_satisfaction(soup): """ """ data = {'ratings': 0, 'score': None, } def _ratings(soup): """Number of times this company has been rated by employees""" ratings = soup.findAll('h3')[0] selector = {'class': 'notranslate'} ratings = ratings.findAll('span', selector)[0] return intify(ratings.text.strip()) def _score(soup): selector = {'class': 'gdRatingValueBar gdrHighmed'} score = soup.findAll('span', selector)[0] return float(score.text) _soups = soup.findAll('div', {'id': 'EmployerRatings'}) if _soups: _soup = _soups[0] data['ratings'] = tryelse(partial(_ratings, _soup), default=data['ratings']) data['score'] = tryelse(partial(_score, _soup), default=data['score']) return data
def parse_ceo(soup): data = {'reviews': 0, '%approval': None, 'avatar': '', 'name': '' } def _name(soup): selector = {'class': 'ceoName notranslate'} return soup.findAll('h4', selector)[0].text def _reviews(soup): selector_span = {'class': 'numCEORatings minor'} selector_tt = {'class': 'notranslate'} reviews_span = soup.findAll('span', selector_span)[0] reviews_tt = reviews_span.findAll('tt', selector_tt)[0] reviews = reviews_tt.text return intify(reviews) def _avatar(soup): selector_div = {'id': 'CEOHeadShot'} avatar_div = soup.findAll('div', selector_div)[0] avatar = avatar_div.findAll('img')[0] return avatar['src'] def _approval(soup): selector_span = {'class': 'approvalPercent'} selector_tt = {'class': 'notranslate'} approval_span = soup.findAll('span', selector_span)[0] approval_tt = approval_span.findAll('tt', selector_tt)[0] approval = approval_tt.text return intify(approval) _soups = soup.findAll('div', {'class': 'ceoRating cf'}) if _soups: _soup = _soups[0] data['name'] = tryelse(partial(_name, _soup), default='') data['reviews'] = tryelse(partial(_reviews, _soup), default=0) data['avatar'] = tryelse(partial(_avatar, _soup), default='') data['%approval'] = tryelse(partial(_approval, _soup), default=None) return data
def parse_meta(soup): data = {'website': '', 'name': '', 'location': '', 'logo': '', 'connections': None, 'reviews': None, 'score': None, } def _reviews(soup): selector_outer = {'class': 'numReviews subtle'} selector = {'class': 'txtShadowWhite'} reviews_outer = soup.findAll('span', selector_outer)[0] reviews = reviews_outer.findAll('span', selector)[0] return intify(reviews.text) def _score(soup): selector = {'class': 'h2 tightVert tightHorz notranslate'} score = soup.findAll('span', selector)[0].findAll('strong')[0] return float(score.text) def _details(soup): details = {} soup = soup.findAll('div', {'id': 'InfoDetails'}) if soup: metas = soup[0].findAll('div', {'class': 'empInfo cf'}) for meta in metas: label = str(meta.findAll('label')[0].text) value = str(meta.findAll('span')[0].text) details[label] = value return details def _connections(soup): selector_div = {'id': 'OverviewInsideConnections'} selector_tt = {'class': 'notranslate'} connections_div = soup.findAll('div', selector_div) connections_tt = connections_div[0].findAll('tt', selector_tt) connections = connections_tt[0].text return intify(connections) def _logo(soup): selector = {'class': 'sqLogo tighten medSqLogo'} logo = soup.findAll('span', selector)[0].findAll('img')[0]['src'] return logo def _website(soup): selector = {'class': 'website notranslate txtShadowWhite'} website = soup.findAll('span', selector)[0].text return website def _name(soup): selector = {'class': 'i-emp'} name = soup.findAll('tt', selector)[0].text return name def _location(soup): location = soup.findAll('span', {'class': 'value i-loc'})[0].text return location def _size(soup): selector_div = {'class': 'moreData margTop5 subtle'} selector = {'class': 'notranslate'} size_div = soup.findAll('div', selector_div)[0] sizes = size_div.findAll('tt', selector) return [intify(size.text) for size in sizes] data['connections'] = tryelse(partial(_connections, soup), default=0) data['website'] = tryelse(partial(_website, soup), default='') data['name'] = tryelse(partial(_name, soup), default='') data['location'] = tryelse(partial(_location, soup), default='') data['size'] = tryelse(partial(_size, soup), default=[None, None]) data['reviews'] = tryelse(partial(_reviews, soup), default=None) data['logo'] = tryelse(partial(_logo, soup), default=None) data['score'] = tryelse(partial(_score, soup), default=None) data.update(_details(soup)) return data
def parse_meta(soup): data = {'website': '', 'name': '', 'location': '', 'logo': '', 'connections': None, 'reviews': None, 'score': None, } def _reviews(soup): selector_outer = {'class': 'numReviews subtle'} selector = {'class': 'txtShadowWhite'} reviews_outer = soup.findAll('span', selector_outer)[0] reviews = reviews_outer.findAll('span', selector)[0] return intify(reviews.text) def _score(soup): selector = {'class': 'h2 tightVert tightHorz notranslate'} score = soup.findAll('span', selector)[0].findAll('strong')[0] return float(score.text) def _details(soup): details = {} soup = soup.findAll('div', {'id': 'InfoDetails'}) if soup: metas = soup[0].findAll('div', {'class': 'empInfo cf'}) for meta in metas: label = str(meta.findAll('label')[0].text) value = str(meta.findAll('span')[0].text.encode('utf-8')) details[label] = value return details def _connections(soup): selector_div = {'id': 'OverviewInsideConnections'} selector_tt = {'class': 'notranslate'} connections_div = soup.findAll('div', selector_div) connections_tt = connections_div[0].findAll('tt', selector_tt) connections = connections_tt[0].text return intify(connections) def _logo(soup): selector = {'class': 'sqLogo tighten medSqLogo'} logo = soup.findAll('span', selector)[0].findAll('img')[0]['src'] return logo def _website(soup): selector = {'class': 'website notranslate txtShadowWhite'} website = soup.findAll('span', selector)[0].text return website def _name(soup): selector = {'class': 'i-emp'} name = soup.findAll('tt', selector)[0].text return name def _location(soup): location = soup.findAll('span', {'class': 'value i-loc'})[0].text return location def _size(soup): selector_div = {'class': 'moreData margTop5 subtle'} selector = {'class': 'notranslate'} size_div = soup.findAll('div', selector_div)[0] sizes = size_div.findAll('tt', selector) return [intify(size.text) for size in sizes] data['connections'] = tryelse(partial(_connections, soup), default=0) data['website'] = tryelse(partial(_website, soup), default='') data['name'] = tryelse(partial(_name, soup), default='') data['location'] = tryelse(partial(_location, soup), default='') data['size'] = tryelse(partial(_size, soup), default=[None, None]) data['reviews'] = tryelse(partial(_reviews, soup), default=None) data['logo'] = tryelse(partial(_logo, soup), default=None) data['score'] = tryelse(partial(_score, soup), default=None) data.update(_details(soup)) return data