예제 #1
0
 def post(self):
     company = Company()
     company.name = self.GetParam("name")
     company.industry = self.GetParam("industry")
     company.url = self.GetParam("url")
     company.put()
     self.redirect("/companies/view/" + company.urlname)
def process_company_file(data_file):
    workbook = xlrd.open_workbook(settings.MEDIA_ROOT+'/'+data_file.uploaded_file.name)
    worksheets = workbook.sheet_names()
    data_file.number_of_sheets = len(worksheets)
    data_file.save()
    for worksheet_name in worksheets:
        worksheet = workbook.sheet_by_name(worksheet_name)
        num_rows = worksheet.nrows - 1
        curr_row = 0            
        while curr_row < num_rows:
            curr_row += 1                
            company_name = worksheet.cell_value(curr_row, 0)
            industry = worksheet.cell_value(curr_row, 1)
            isin = worksheet.cell_value(curr_row, 2)
            bse_code = worksheet.cell_value(curr_row, 3)
            industry, created = Industry.objects.get_or_create(industry_name=industry)
            industry.created_by = data_file.uploaded_by
            industry.save()
            try:
                company = Company.objects.get(isin_code=isin)
            except:
                try:
                    company = Company.objects.get(company_name=company_name)    
                except:
                    company = Company()
            company.isin_code = isin
            if bse_code:
                company.BSE_code = int(bse_code)
            company.industry = industry
            company.company_name = company_name
            company.created_by = data_file.uploaded_by
            company.save()
    data_file.processing_completed = True
    data_file.save()
예제 #3
0
def get_snp_500_companies(url):
    companies = []
    table = pd.read_html(url)[0]
    for index, row in table.iterrows():
        company = Company(row['Symbol'])
        company.name = row['Security']
        company.sector = row['GICS Sector']
        company.industry = row['GICS Sub Industry']
        company.date_added = row['Date first added']
        companies.append(company)
    return companies
예제 #4
0
    def save_company(self, tree):
        company = Company()
        
        #company introduction
        l = tree.xpath('//div[contains(@class, \'jobs_txt\')]/p')
        #print "company introduction:"
        for i in l:
            if not i.text is None:
                #print i.text
                company.introduction = i.text


        #company name
        l = tree.xpath('//td[@class=\'sr_bt\']')
        for i in l:
            tr = l[0].getparent()
            print "company name:"
            iters = tr.itersiblings(); 
            for it in iters: 
                a_list = it.xpath('//td/table/tr/td/a')
                for i in a_list:
                    if not i.text is None:
                        #print i.text
                        company.name = i.text
                break
            break

        #company info
        l = tree.xpath('//td/strong')
        txt1_tag = ['公司行业:', '公司性质:', '公司规模:' ]

        remove_from_list(l, txt1_tag)
        for i in l:
            td = i.getparent()
            iters = td.itertext()
            index = 0
            for it in iters:
                it = it.lstrip()
                if it in txt1_tag:
                    continue
                if index == 0:
                    company.industry = it
                elif index == 1:
                    company.nature = it
                elif index == 2:
                    company.scale = it
                index += 1
                #print "[%s]" % it.lstrip()
            break

        company.save()
예제 #5
0
def getCompanyInfo(dom):
    '''获取一个公司的信息'''
    info_items = dom('.companyInfoItems')
    info_trs = info_items('.companyInfoTab tr')
    

    company_info = {}
    for tr in info_trs:
        tr = pq(tr)
        k = tr('td:eq(0)').text().split(u':')[0]
        v = tr('td:eq(1)').text()
        company_info[k] = v

    scale = company_info.get(u'公司规模')
    if scale:
        sh = re.search(r'(\d+)-(\d+)', scale)
        scale = sh.groups() if sh else (None, None)
    else:
        scale = (None, None)

    ####
    jcs = dom('.jobContact>div>div').find('div') # Job Contact
    for jc in jcs:
        jc = pq(jc)
        jctext = jc.text().split(u':')
        if len(jctext) == 2:
            k, v = jctext
            company_info[k] = v 

    com = Company()
    com.name = info_items('.companyTitle').text()
    com.industry = company_info.get(u'公司行业')
    com.type = company_info.get(u'公司类型')
    com.address = company_info.get(u'公司地址')
    com.website = company_info.get(u'公司主页')
    com.scale_low, com.scale_high = scale
    com.email = None
    com.phone_num = None
    com.description = dom('.black12 tr:eq(2)').find('td').html()
    com.etag = ''

    return com
예제 #6
0
def getCompanyInfo(dom):
    '''获取一个公司的信息'''
    info_items = dom('.companyInfoItems')
    info_trs = info_items('.companyInfoTab tr')

    company_info = {}
    for tr in info_trs:
        tr = pq(tr)
        k = tr('td:eq(0)').text().split(u':')[0]
        v = tr('td:eq(1)').text()
        company_info[k] = v

    scale = company_info.get(u'公司规模')
    if scale:
        sh = re.search(r'(\d+)-(\d+)', scale)
        scale = sh.groups() if sh else (None, None)
    else:
        scale = (None, None)

    ####
    jcs = dom('.jobContact>div>div').find('div')  # Job Contact
    for jc in jcs:
        jc = pq(jc)
        jctext = jc.text().split(u':')
        if len(jctext) == 2:
            k, v = jctext
            company_info[k] = v

    com = Company()
    com.name = info_items('.companyTitle').text()
    com.industry = company_info.get(u'公司行业')
    com.type = company_info.get(u'公司类型')
    com.address = company_info.get(u'公司地址')
    com.website = company_info.get(u'公司主页')
    com.scale_low, com.scale_high = scale
    com.email = None
    com.phone_num = None
    com.description = dom('.black12 tr:eq(2)').find('td').html()
    com.etag = ''

    return com