Ejemplo n.º 1
0
def get_rank(api, tree, rank_type, key_code='', val_code='Rank'):
    rank_search = '//{%s}%s' % (api.NS_PREFIXES['awis'], rank_type)
    rank_root = tree.find(rank_search)
    ret = None
    try:
        if rank_type == 'Rank':
            ret = rank_root.text 
            if not ret:
                log.info("\tRank not specified.")
        else: 
            ret = {}
            for child in rank_root:
                # print '{} -> {}'.format(child.tag, child.attrib)
                key = child.attrib.get(key_code)
                for nep in child:
                    if 'Rank' in nep.tag:
                        value = nep.text
                        if value:
                            ret[key] = value
    except:
        log.info('\t{} info not found...'.format(rank_type))
        log.debug(traceback.format_exc())
    return ret
Ejemplo n.º 2
0
def get_awis_tree(awis, website):
        if website:
            log.info("Getting AWIS data for {}...".format(website))
            tree = awis.url_info(website, 'Rank', 'RankByCountry', 'RankByCity')
            status_search = "//{%s}StatusCode" % awis.NS_PREFIXES["alexa"]
            status = tree.find(status_search)
            try:
                if status.text != "Success":
                    log.info('''ERROR:AWIS request unsuccessful for website: 
\t{}'''.format(Website))
                    tree.write(sys.stdout)
                    return None
                return tree
            except Exception as e:
                log.debug(traceback.format_exc())
                log.info("Failed to read AWIS return status...")
        else:
            log.info("No website specified. Website is {}.".format(website))
Ejemplo n.º 3
0
def get_cb_raw_details(cb, company_name):
    log.info("Looking up '{}' in CB...".format(company_name)) 
    details = cb.company(company_name)
    if not check_details(details):
        cn = company_name.replace(' ','')
        cn = cn.replace('.com','')
        if cn != company_name:
            log.info("\t {}...".format(cn))
            details = cb.company(cn)
    if not check_details(details):
        cn = company_name.lower().replace(' ','-')
        cn = cn.replace('.','-')
        if cn != company_name:
            log.info("\t {}...".format(cn))
            details = cb.company(cn)
    if not check_details(details):
        return None
    return details
Ejemplo n.º 4
0
def get_company_details(cb, awis, company_name):
    """
    Use company name to return a list of company details named in needed_details.
    """
    details_list = []

    # get company name and strip spaces and punctuation
    name = company_name.strip()
    company_name = name.strip(string.punctuation)
    log.info('')
    log.info("{}".format(company_name.upper()))
    new_company_name = company_name

    # get company details from CB
    details = get_cb_raw_details(cb, company_name)
    if not details:
        log.info('Searching CB for company {}...'.format(company_name))
        search_details = cb.search(company_name)
        if search_details['total'] == 0:
            return [company_name]
        else:
            for result in search_details['results']:
                try:
                    if company_name in result['name'] \
                            or result['name'] in company_name:
                        new_company_name = result['name']
                        details = get_cb_raw_details(cb, new_company_name)
                        company_name = "{}{}({})".format('?',
                                                         company_name,
                                                         new_company_name)
                        break
                except:
                    pass
    if not details:
        log.info("\t\tNOT FOUND")
        return [company_name]

    # get website details from AWIS
    website = get_info(details, 'homepage_url')
    tree = get_awis_tree(awis, website)
    # get prev 3 month rank
    prev3 = get_rank(awis, tree, 'Rank')

    #Build CSV line list
    # 'Name' - rewrite in case new company found
    lappend(details_list, company_name)
    # 'Website'
    lappend(details_list, website)
    # 'Status'
    acq = get_info(details, 'acquisition')
    ipo = get_info(details, 'ipo')
    status = 'Private'
    if ipo or acq:
        status = 'Public'
    lappend(details_list, status)
    # 'PIC($m)'
    raised_amount = 0
    investors_set = set()
    fund_rounds = get_info(details, 'funding_rounds')
    try:
        for funds in fund_rounds:
            amount = get_info(funds, 'raised_amount')
            if amount: 
                raised_amount += amount
            for investor in funds['investments']:
                if investor['financial_org']:
                    investors_set.add(investor['financial_org']['name'])
    except:
        pass
    if raised_amount == 0: 
        raised_amount = NA
    else:
        raised_amount = '$'+str(raised_amount)
    # OR raised_amount = get_info(details, 'total_money_raised')
    lappend(details_list, raised_amount)
    # 'Existing Investors'
    if not investors_set:
        investors = NA
    else:
        investors = ','.join(investors_set)
    lappend(details_list, investors)
    # '# of Employees'
    lappend(details_list, get_info(details, 'number_of_employees'))
    # 'Prev 3 mos'
    lappend(details_list, prev3)
    # 'Description'
    description = get_info(details,'overview')
    lappend(details_list, strip_formatting(description))
    # 'City'
    offices = get_info(details, 'offices')
    try: 
        offices = offices[0]
        city = get_info(offices, 'city')
        state = get_info(offices,'state_code')
    except:
        city, state = NA, NA
    lappend(details_list, city)
    # 'State'
    lappend(details_list, state)
    # 'CEO Name'
    people = get_info(details, 'relationships')
    ceo = NA
    try:
        for person in people:
            if not person['is_past'] and 'CEO' in person['title']:
                person = get_info(person,'person')
                if person:
                    ceo = "{} {}".format(person['first_name'],
                                         person['last_name'])
    except: 
        pass
    lappend(details_list, ceo)        
#            ceo = person['person']['permalink']
#            ceo_details = cb.person(ceo)
#            external_link = ceo_details['web_presences'][0]['external_url']
    # 'Contact Number'
    lappend(details_list, get_info(details, 'phone_number'))
    # 'Email Address'
    lappend(details_list, get_info(details, 'email_address'))

    #Add extra info if required
    # order here should be the same as in the main function section
    if args.extra_info:
        # get ranks by city and country
        rco = get_rank(awis, tree, 'RankByCountry', 'Code')
        rci = get_rank(awis, tree, 'RankByCity', 'Name')
        # 'Rank By Country'
        lappend(details_list, rco)
        # 'Rank By City'
        lappend(details_list, rci)

    return [unicode_to_str(detail) for detail in details_list]
Ejemplo n.º 5
0
            headers = [x for x in h if x]
            headers.remove('Alexa Traffic Global Rank')
            if args.extra_info:
                headers.append('Rank By Country')
                headers.append('Rank By City')
            writer.writerow(headers)
            for line in reader:
                if not_empty(line):
                    company_name = line[0].strip()
                    new_line = get_company_details(cb, awis, company_name)
                    writer.writerow(new_line)
    
if __name__ == '__main__':
    parse_cli_opts()
    if not get_keys(args.key_file):
        log.info("""ERROR: Failed reading API keys from file {}.
Check the readme and make sure the file exists and 
has the appropriate format.""".format(args.key_file))
        exit(1)
    try:
        main()
    except IOError as e:
        if 'response code is 403' in e.message:
            log.debug(traceback.format_exc())
            log.info("!!! -> Make sure the AWIS keys are correctly read from key file.\n")
        else:
            log.info(e)