def post(self): company = Company() company.name = self.GetParam("name") company.industry = self.GetParam("industry") company.url = self.GetParam("url") company.put() self.redirect("/companies/view/" + company.urlname)
def process_company_file(data_file): workbook = xlrd.open_workbook(settings.MEDIA_ROOT+'/'+data_file.uploaded_file.name) worksheets = workbook.sheet_names() data_file.number_of_sheets = len(worksheets) data_file.save() for worksheet_name in worksheets: worksheet = workbook.sheet_by_name(worksheet_name) num_rows = worksheet.nrows - 1 curr_row = 0 while curr_row < num_rows: curr_row += 1 company_name = worksheet.cell_value(curr_row, 0) industry = worksheet.cell_value(curr_row, 1) isin = worksheet.cell_value(curr_row, 2) bse_code = worksheet.cell_value(curr_row, 3) industry, created = Industry.objects.get_or_create(industry_name=industry) industry.created_by = data_file.uploaded_by industry.save() try: company = Company.objects.get(isin_code=isin) except: try: company = Company.objects.get(company_name=company_name) except: company = Company() company.isin_code = isin if bse_code: company.BSE_code = int(bse_code) company.industry = industry company.company_name = company_name company.created_by = data_file.uploaded_by company.save() data_file.processing_completed = True data_file.save()
def get_snp_500_companies(url): companies = [] table = pd.read_html(url)[0] for index, row in table.iterrows(): company = Company(row['Symbol']) company.name = row['Security'] company.sector = row['GICS Sector'] company.industry = row['GICS Sub Industry'] company.date_added = row['Date first added'] companies.append(company) return companies
def save_company(self, tree): company = Company() #company introduction l = tree.xpath('//div[contains(@class, \'jobs_txt\')]/p') #print "company introduction:" for i in l: if not i.text is None: #print i.text company.introduction = i.text #company name l = tree.xpath('//td[@class=\'sr_bt\']') for i in l: tr = l[0].getparent() print "company name:" iters = tr.itersiblings(); for it in iters: a_list = it.xpath('//td/table/tr/td/a') for i in a_list: if not i.text is None: #print i.text company.name = i.text break break #company info l = tree.xpath('//td/strong') txt1_tag = ['公司行业:', '公司性质:', '公司规模:' ] remove_from_list(l, txt1_tag) for i in l: td = i.getparent() iters = td.itertext() index = 0 for it in iters: it = it.lstrip() if it in txt1_tag: continue if index == 0: company.industry = it elif index == 1: company.nature = it elif index == 2: company.scale = it index += 1 #print "[%s]" % it.lstrip() break company.save()
def getCompanyInfo(dom): '''获取一个公司的信息''' info_items = dom('.companyInfoItems') info_trs = info_items('.companyInfoTab tr') company_info = {} for tr in info_trs: tr = pq(tr) k = tr('td:eq(0)').text().split(u':')[0] v = tr('td:eq(1)').text() company_info[k] = v scale = company_info.get(u'公司规模') if scale: sh = re.search(r'(\d+)-(\d+)', scale) scale = sh.groups() if sh else (None, None) else: scale = (None, None) #### jcs = dom('.jobContact>div>div').find('div') # Job Contact for jc in jcs: jc = pq(jc) jctext = jc.text().split(u':') if len(jctext) == 2: k, v = jctext company_info[k] = v com = Company() com.name = info_items('.companyTitle').text() com.industry = company_info.get(u'公司行业') com.type = company_info.get(u'公司类型') com.address = company_info.get(u'公司地址') com.website = company_info.get(u'公司主页') com.scale_low, com.scale_high = scale com.email = None com.phone_num = None com.description = dom('.black12 tr:eq(2)').find('td').html() com.etag = '' return com