コード例 #1
0
def write_to_file(file_name, companies, summary_columns, wb_sheet):
    """
    This method generates a new Excel file with the name <file_name> that will
    contains the data found in PreSeries for each entry in the original excel
    file


    :param file_name: the name of the file to be generated
    :param companies: a list of companies with the basic data found in PreSeries
    :param summary_columns: the columns in the original file that will be
        used in the new file to give more information about the companies.
    :param wb_sheet: the excel sheet of the original file where we will find
        the summary fields of the companies.
    """
    workbook = Workbook()
    companies_sheet = workbook.add_sheet('Companies')

    # Build the header names
    header = ["Original Row", "Company Name", "Domain", "Country"]
    header.extend(summary_columns) \
        if summary_columns and len(summary_columns) > 0 else None
    [companies_sheet.write(0, index, value)
     for index, value in enumerate(header)]

    for index, company_data in enumerate(companies):
        companies_sheet.write(1 + index, 0, company_data["row"])
        companies_sheet.write(
            1 + index, 1, company_data["name"].decode('utf-8', 'ignore')
            if "name" in company_data else "")
        companies_sheet.write(
            1 + index, 2, company_data["country_code"]
            if "country_code" in company_data else "")
        companies_sheet.write(
            1 + index, 3, company_data["domain"]
            if "domain" in company_data else "")
        for index2, summary_column in enumerate(summary_columns):
            try:
                columnvalue = wb_sheet.cell_value(
                    company_data["row"],
                    PreSeriesUtils.excel2num(summary_column)).encode('cp1252')
            except UnicodeEncodeError:
                columnvalue = wb_sheet.cell_value(
                    company_data["row"],
                    PreSeriesUtils.excel2num(summary_column)).encode('utf-8')
                pass

            companies_sheet.write(1 + index, 4 + index2,
                                  columnvalue.decode('utf-8', 'ignore'))
    workbook.save(file_name)
コード例 #2
0
ファイル: searcher.py プロジェクト: fabdrnd/api_examples
    def read_search_data_from_excel(self,
                                    file_name,
                                    column_id=None,
                                    column_name=None,
                                    column_country=None,
                                    column_domain=None,
                                    skip_rows=False):
        """
        This method is responsible for extract from an Excel file all the
        companies we will need to find in PreSeries.

        for build the query parameters that
        we are going to use to look for the companies in PreSeries informed
        in an Excel file.

        The query string will have only the id criteria or the name of the
         company if the id is not informed. The domain and country_code won't
         be used in the query, we will use them later for select the best
         match from all the candidates that matched the query.

        :return: a list where each row is one company which contains a tuple
            with two items, the query string to look in preseries for the
            company and the map with all the parameters used in the query
        """

        logging.debug("Looking for the first sheet in the Excel.")
        wb = open_workbook(file_name)
        first_sheet = wb.sheets()[0]
        logging.debug("Sheet name [%s]." % first_sheet.name)

        self.companies_query = []
        for row in range(skip_rows, first_sheet.nrows):

            logging.debug("Processing row: %d" % row)

            if column_id:
                company_id = first_sheet.cell_value(
                    row, PreSeriesUtils.excel2num(column_id))
                self.companies_query.append(("id=%s" % company_id, {
                    "row": row,
                    "id": company_id
                }))
                continue

            query_string = {}
            query_params = {"row": row}

            if column_name and \
                    first_sheet.cell_value(
                        row, PreSeriesUtils.excel2num(column_name)):

                try:
                    company_name = first_sheet.cell_value(
                        row,
                        PreSeriesUtils.excel2num(column_name)).encode('cp1252')
                except UnicodeEncodeError:
                    company_name = first_sheet.cell_value(
                        row,
                        PreSeriesUtils.excel2num(column_name)).encode('utf-8')
                    pass

                query_string['name__icontains'] = company_name
                query_params["name"] = company_name

            if column_domain:
                company_domain = PreSeriesUtils.resolve_domain(
                    first_sheet.cell_value(
                        row, PreSeriesUtils.excel2num(column_domain)))

                if company_domain:
                    # We only use the domain after the search to select the
                    # best candidate
                    query_params["domain"] = company_domain

            if column_country and \
                    first_sheet.cell_value(
                        row, PreSeriesUtils.excel2num(column_country)):
                country_code = PreSeriesUtils.resolve_country(
                    first_sheet.cell_value(
                        row, PreSeriesUtils.excel2num(column_country)))

                if country_code:
                    # We only use the country_code after the search to
                    # select the best candidate
                    query_params['country_code'] = country_code

            self.companies_query.append(
                (urllib.urlencode(query_string), query_params))