Example #1
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        self.frame['A'][:5] = nan

        self.frame.to_excel(path, 'test1')
        self.frame.to_excel(path, 'test1', cols=['A', 'B'])
        self.frame.to_excel(path, 'test1', header=False)
        self.frame.to_excel(path, 'test1', index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer, 'test1')
        self.tsframe.to_excel(writer, 'test2')
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse('test1', index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse('test2', index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal('test1', reader.sheet_names[0])
        np.testing.assert_equal('test2', reader.sheet_names[1])

        os.remove(path)
Example #2
0
    def _check_extension_sheets(self, ext):
        path = "__tmp_to_excel_from_excel_sheets__." + ext

        self.frame["A"][:5] = nan

        self.frame.to_excel(path, "test1")
        self.frame.to_excel(path, "test1", cols=["A", "B"])
        self.frame.to_excel(path, "test1", header=False)
        self.frame.to_excel(path, "test1", index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer, "test1")
        self.tsframe.to_excel(writer, "test2")
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse("test1", index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse("test2", index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal("test1", reader.sheet_names[0])
        np.testing.assert_equal("test2", reader.sheet_names[1])

        os.remove(path)
Example #3
0
    def _check_extension_sheets(self, ext):
        path = '__tmp_to_excel_from_excel_sheets__.' + ext

        self.frame['A'][:5] = nan

        self.frame.to_excel(path,'test1')
        self.frame.to_excel(path,'test1', cols=['A', 'B'])
        self.frame.to_excel(path,'test1', header=False)
        self.frame.to_excel(path,'test1', index=False)

        # Test writing to separate sheets
        writer = ExcelWriter(path)
        self.frame.to_excel(writer,'test1')
        self.tsframe.to_excel(writer,'test2')
        writer.save()
        reader = ExcelFile(path)
        recons = reader.parse('test1',index_col=0)
        tm.assert_frame_equal(self.frame, recons)
        recons = reader.parse('test2',index_col=0)
        tm.assert_frame_equal(self.tsframe, recons)
        np.testing.assert_equal(2, len(reader.sheet_names))
        np.testing.assert_equal('test1', reader.sheet_names[0])
        np.testing.assert_equal('test2', reader.sheet_names[1])

        os.remove(path)
    def export_to(self, file_path, batchsize=100):
        self.xls_writer = ExcelWriter(file_path)

        # get record count
        record_count = self._query_mongo(count=True)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object
        start = 0
        header = True
        while start < record_count:
            cursor = self._query_mongo(self.filter_query, start=start,
                limit=batchsize)

            data = self._format_for_dataframe(cursor)

            # write all cursor's data to their respective sheets
            for section_name, section in self.sections.iteritems():
                records = data[section_name]
                # TODO: currently ignoring nested repeats so ignore sections that have 0 records
                if len(records) > 0:
                    columns = section["columns"] + self.EXTRA_COLUMNS
                    writer = XLSDataFrameWriter(records, columns)
                    writer.write_to_excel(self.xls_writer, section_name,
                            header=header, index=False)
            header = False
            # increment counter(s)
            start += batchsize
        self.xls_writer.save()
Example #5
0
    def export_to(self, file_path, batchsize=1000):
        self.xls_writer = ExcelWriter(file_path)

        # get record count
        record_count = self._query_mongo(count=True)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object
        start = 0
        header = True
        while start < record_count:
            cursor = self._query_mongo(self.filter_query, start=start,
                limit=batchsize)

            data = self._format_for_dataframe(cursor)

            # write all cursor's data to their respective sheets
            for section_name, section in self.sections.iteritems():
                records = data[section_name]
                # TODO: currently ignoring nested repeats so ignore sections that have 0 records
                if len(records) > 0:
                    # use a different group delimiter if needed
                    columns = section["columns"]
                    if self.group_delimiter != DEFAULT_GROUP_DELIMITER:
                        columns = [self.group_delimiter.join(col.split("/")) for col in columns ]
                    columns = columns + self.EXTRA_COLUMNS
                    writer = XLSDataFrameWriter(records, columns)
                    writer.write_to_excel(self.xls_writer, section_name,
                            header=header, index=False)
            header = False
            # increment counter(s)
            start += batchsize
            time.sleep(0.1)
        self.xls_writer.save()
    def export_to(self, file_path):
        self.xls_writer = ExcelWriter(file_path)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object

        # get records from mongo - do this on export so we can batch if we
        # choose to, as we should
        cursor = self._query_mongo(self.filter_query)

        data = self._format_for_dataframe(cursor)

        #TODO: batching will not work as expected since indexes are calculated
        # based the current batch, a new batch will re-calculate indexes and if
        # they are going into the same excel file, we'll have duplicates
        # possible solution - keep track of the last index from each section

        # write all cursor's data to different sheets
        # TODO: for every repeat, the index should be re-calculated
        for section in self.sections:
            # TODO: currently ignoring nested repeat data which will have no
            # records
            records = data[section["name"]]
            if len(records) > 0:
                section_name = section["name"]
                columns = section["columns"] + self.EXTRA_COLUMNS
                writer = XLSDataFrameWriter(records, columns)
                writer.write_to_excel(self.xls_writer, section_name,
                        header=True, index=False)
        self.xls_writer.save()
Example #7
0
    def to_excel(self, path, na_rep=''):
        """
        Write each DataFrame in Panel to a separate excel sheet

        Parameters
        ----------
        excel_writer : string or ExcelWriter object
            File path or existing ExcelWriter
        na_rep : string, default ''
            Missing data rep'n
        """
        from pandas.io.parsers import ExcelWriter
        writer = ExcelWriter(path)
        for item, df in self.iteritems():
            name = str(item)
            df.to_excel(writer, name, na_rep=na_rep)
        writer.save()
Example #8
0
    def to_excel(self, path, na_rep=''):
        """
        Write each DataFrame in Panel to a separate excel sheet

        Parameters
        ----------
        excel_writer : string or ExcelWriter object
            File path or existing ExcelWriter
        na_rep : string, default ''
            Missing data representation
        """
        from pandas.io.parsers import ExcelWriter
        writer = ExcelWriter(path)
        for item, df in self.iteritems():
            name = str(item)
            df.to_excel(writer, name, na_rep=na_rep)
        writer.save()
Example #9
0
    def export_to(self, file_path, batchsize=1000):
        self.xls_writer = ExcelWriter(file_path)

        # get record count
        record_count = self._query_mongo(count=True)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object
        start = 0
        header = True
        while start < record_count:
            cursor = self._query_mongo(self.filter_query,
                                       start=start,
                                       limit=batchsize)

            data = self._format_for_dataframe(cursor)

            # write all cursor's data to their respective sheets
            for section_name, section in self.sections.iteritems():
                records = data[section_name]
                # TODO: currently ignoring nested repeats
                # so ignore sections that have 0 records
                if len(records) > 0:
                    # use a different group delimiter if needed
                    columns = section["columns"]
                    if self.group_delimiter != DEFAULT_GROUP_DELIMITER:
                        columns = [
                            self.group_delimiter.join(col.split("/"))
                            for col in columns
                        ]
                    columns = columns + self.EXTRA_COLUMNS
                    writer = XLSDataFrameWriter(records, columns)
                    writer.write_to_excel(self.xls_writer,
                                          section_name,
                                          header=header,
                                          index=False)
            header = False
            # increment counter(s)
            start += batchsize
            time.sleep(0.1)
        self.xls_writer.save()
Example #10
0
# big = big.drop('AnnStaticRet', 1)
# big = big.drop('AnnCapitalRet', 1)
# big['AnnStaticRet'] = new_ind.AnnStaticRet.values
# big['AnnCapitalRet'] = new_ind.AnnCapitalRet.values

today_str = str(str(month) + str(day) + str(year))

big = big.rename(columns={'Last': 'OptionPrice', 'industry': 'Industry'})

xlsx = '.xlsx'
csv = '.csv'
file_name = 'All_covered_call' + today_str

sectors = big.Sector.unique().astype(str)

name_xl = file_name + xlsx
writer = ExcelWriter(name_xl)
big.to_excel(writer, sheet_name='All Sectors')
summary = big.groupby(['Sector', 'Industry']).mean()
summary.to_excel(writer, sheet_name='Sector Summary')

for i in sectors:
    to_save = big[big.Sector == i]
    name = i.replace('/', '-')
    to_save.to_excel(writer, sheet_name=name)

writer.save()

name_cs = file_name + csv
big.to_csv(name_cs)
Example #11
0
class XLSDataFrameBuilder(AbstractDataFrameBuilder):
    """
    Generate structures from mongo and DataDictionary for a DataFrameXLSWriter

    This builder can choose to query the data in batches and write to a single
    ExcelWriter object using multiple instances of DataFrameXLSWriter
    """
    INDEX_COLUMN = u"_index"
    PARENT_TABLE_NAME_COLUMN = u"_parent_table_name"
    PARENT_INDEX_COLUMN = u"_parent_index"
    EXTRA_COLUMNS = [INDEX_COLUMN, PARENT_TABLE_NAME_COLUMN,
        PARENT_INDEX_COLUMN]
    SHEET_NAME_MAX_CHARS = 30
    XLS_SHEET_COUNT_LIMIT = 255
    XLS_COLUMN_COUNT_MAX = 255
    CURRENT_INDEX_META = 'current_index'

    def __init__(self, username, id_string, filter_query=None):
        super(XLSDataFrameBuilder, self).__init__(username, id_string,
            filter_query)

    def _setup(self):
        super(XLSDataFrameBuilder, self)._setup()
        # need to split columns, with repeats in individual sheets and
        # everything else on the default sheet
        self._generate_sections()

    def export_to(self, file_path, batchsize=100):
        self.xls_writer = ExcelWriter(file_path)

        # get record count
        record_count = self._query_mongo(count=True)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object
        start = 0
        header = True
        while start < record_count:
            cursor = self._query_mongo(self.filter_query, start=start,
                limit=batchsize)

            data = self._format_for_dataframe(cursor)

            # write all cursor's data to their respective sheets
            for section_name, section in self.sections.iteritems():
                records = data[section_name]
                # TODO: currently ignoring nested repeats so ignore sections that have 0 records
                if len(records) > 0:
                    columns = section["columns"] + self.EXTRA_COLUMNS
                    writer = XLSDataFrameWriter(records, columns)
                    writer.write_to_excel(self.xls_writer, section_name,
                            header=header, index=False)
            header = False
            # increment counter(s)
            start += batchsize
        self.xls_writer.save()

    def _format_for_dataframe(self, cursor):
        """
        Format each record for consumption by a dataframe

        returns a dictionary with the key being the name of the sheet, and values
        a list of dicts to feed into a DataFrame
        """
        data = dict((section_name, []) for section_name in self.sections.keys())

        default_section = self.sections[self.survey_name]
        default_columns = default_section["columns"]

        for record in cursor:
            # from record, we'll end up with multiple records, one for each
            # section we have

            # add records for the default section
            self._add_data_for_section(data[self.survey_name],
                record, default_columns, self.survey_name)
            parent_index = default_section[self.CURRENT_INDEX_META]

            for sheet_name, section in self.sections.iteritems():
                # skip default section i.e survey name
                if sheet_name != self.survey_name:
                    xpath = section["xpath"]
                    columns = section["columns"]
                    # TODO: handle nested repeats -ignoring nested repeats for
                    # now which will not be in the top level record, perhaps
                    # nest sections as well so we can recurs in and get them
                    if record.has_key(xpath):
                        repeat_records = record[xpath]
                        num_repeat_records = len(repeat_records)
                        for repeat_record in repeat_records:
                            self._add_data_for_section(data[sheet_name],
                                repeat_record, columns, sheet_name,
                                parent_index, self.survey_name)

        return data

    def _add_data_for_section(self, data_section, record, columns, section_name,
                parent_index = -1, parent_table_name = None):
        data_section.append({})
        self.sections[section_name][self.CURRENT_INDEX_META] += 1
        index = self.sections[section_name][self.CURRENT_INDEX_META]
        #data_section[len(data_section)-1].update(record) # we could simply do
        # this but end up with duplicate data from repeats

        # find any select multiple(s) and add additional columns to record
        record = self._split_select_multiples(record, self.select_multiples)
        # alt, precision
        self._split_gps_fields(record, self.gps_fields)
        for column in columns:
            data_value = None
            try:
                data_value = record[column]
            except KeyError:
                # a record may not have responses for some elements simply
                # because they were not captured
                pass
            data_section[len(data_section)-1].update({column: data_value})

        data_section[len(data_section)-1].update({
            XLSDataFrameBuilder.INDEX_COLUMN: index,
            XLSDataFrameBuilder.PARENT_INDEX_COLUMN: parent_index,
            XLSDataFrameBuilder.PARENT_TABLE_NAME_COLUMN: parent_table_name})

    def _generate_sections(self):
        """
        Split survey questions into separate sections for each xls sheet and
        columns for each section
        """
        # clear list
        self.sections = OrderedDict()
        self.survey_name, survey_xpath = survey_name_and_xpath_from_dd(self.dd)

        # generate a unique and valid xls sheet name
        self.survey_name = get_valid_sheet_name(self.survey_name,
                self.sections.keys())
        # setup the default section
        self._create_section(self.survey_name, survey_xpath, False)

        # dict of select multiple elements
        self.select_multiples = {}

        # get form elements to split repeats into separate section/sheets and
        # everything else in the default section
        for e in self.dd.get_survey_elements():
            # check for a Section or sub-classes of
            if isinstance(e, Section):
                # always default to the main sheet
                sheet_name = self.survey_name

                # if a repeat we use its name
                if isinstance(e, RepeatingSection):
                    sheet_name = e.name
                    sheet_name = get_valid_sheet_name(sheet_name,
                            self.sections.keys())
                    self._create_section(sheet_name, e.get_abbreviated_xpath(),
                            True)

                # for each child add to survey_sections
                for c in e.children:
                    if isinstance(c, Question) and not \
                            question_types_to_exclude(c.type)\
                    and not c.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE:
                            self._add_column_to_section(sheet_name, c)
                    elif c.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE:
                        self.select_multiples[c.get_abbreviated_xpath()] = \
                        [option.get_abbreviated_xpath() for option in
                                c.children]
                        # if select multiple, get its choices and make them
                        # columns
                        for option in c.children:
                            self._add_column_to_section(sheet_name, option)
                    # split gps fields within this section
                    if c.bind.get(u"type") == GEOPOINT_BIND_TYPE:
                        # add columns for geopoint components
                        for xpath in\
                            self.dd.get_additional_geopoint_xpaths(
                            c.get_abbreviated_xpath()):
                            self._add_column_to_section(sheet_name, xpath)
        self.get_exceeds_xls_limits()

    def get_exceeds_xls_limits(self):
        if not hasattr(self, "exceeds_xls_limits"):
            self.exceeds_xls_limits = False
            if len(self.sections) > self.XLS_SHEET_COUNT_LIMIT:
                self.exceeds_xls_limits = True
            else:
                for section in self.sections.itervalues():
                    if len(section["columns"]) > self.XLS_COLUMN_COUNT_MAX:
                        self.exceeds_xls_limits = True
                        break
        return self.exceeds_xls_limits

    def _create_section(self, section_name, xpath, is_repeat):
        index = len(self.sections)
        self.sections[section_name] = {"name": section_name, "xpath": xpath,
                              "columns": [], "is_repeat": is_repeat,
                              self.CURRENT_INDEX_META: 0}

    def _add_column_to_section(self, sheet_name, column):
        section = self.sections[sheet_name]
        xpath = None
        if isinstance(column, SurveyElement):
            xpath = column.get_abbreviated_xpath()
        elif isinstance(column, basestring):
            xpath = column
        assert(xpath)
        # make sure column is not already in list
        if xpath not in section["columns"]:
            section["columns"].append(xpath)
Example #12
0
class XLSDataFrameBuilder(AbstractDataFrameBuilder):
    """
    Generate structures from mongo and DataDictionary for a DataFrameXLSWriter

    This builder can choose to query the data in batches and write to a single
    ExcelWriter object using multiple instances of DataFrameXLSWriter
    """
    INDEX_COLUMN = u"_index"
    PARENT_TABLE_NAME_COLUMN = u"_parent_table_name"
    PARENT_INDEX_COLUMN = u"_parent_index"
    EXTRA_COLUMNS = [
        INDEX_COLUMN, PARENT_TABLE_NAME_COLUMN, PARENT_INDEX_COLUMN
    ]
    SHEET_NAME_MAX_CHARS = 30
    XLS_SHEET_COUNT_LIMIT = 255
    XLS_COLUMN_COUNT_MAX = 255
    CURRENT_INDEX_META = 'current_index'

    def __init__(self,
                 username,
                 id_string,
                 filter_query=None,
                 group_delimiter=DEFAULT_GROUP_DELIMITER,
                 split_select_multiples=True):
        super(XLSDataFrameBuilder,
              self).__init__(username, id_string, filter_query,
                             group_delimiter, split_select_multiples)

    def _setup(self):
        super(XLSDataFrameBuilder, self)._setup()
        # need to split columns, with repeats in individual sheets and
        # everything else on the default sheet
        self._generate_sections()

    def export_to(self, file_path, batchsize=1000):
        self.xls_writer = ExcelWriter(file_path)

        # get record count
        record_count = self._query_mongo(count=True)

        # query in batches and for each batch create an XLSDataFrameWriter and
        # write to existing xls_writer object
        start = 0
        header = True
        while start < record_count:
            cursor = self._query_mongo(self.filter_query,
                                       start=start,
                                       limit=batchsize)

            data = self._format_for_dataframe(cursor)

            # write all cursor's data to their respective sheets
            for section_name, section in self.sections.iteritems():
                records = data[section_name]
                # TODO: currently ignoring nested repeats so ignore sections that have 0 records
                if len(records) > 0:
                    # use a different group delimiter if needed
                    columns = section["columns"]
                    if self.group_delimiter != DEFAULT_GROUP_DELIMITER:
                        columns = [
                            self.group_delimiter.join(col.split("/"))
                            for col in columns
                        ]
                    columns = columns + self.EXTRA_COLUMNS
                    writer = XLSDataFrameWriter(records, columns)
                    writer.write_to_excel(self.xls_writer,
                                          section_name,
                                          header=header,
                                          index=False)
            header = False
            # increment counter(s)
            start += batchsize
            time.sleep(0.1)
        self.xls_writer.save()

    def _format_for_dataframe(self, cursor):
        """
        Format each record for consumption by a dataframe

        returns a dictionary with the key being the name of the sheet, and values
        a list of dicts to feed into a DataFrame
        """
        data = dict(
            (section_name, []) for section_name in self.sections.keys())

        main_section = self.sections[self.survey_name]
        main_sections_columns = main_section["columns"]

        for record in cursor:
            # from record, we'll end up with multiple records, one for each
            # section we have

            # add records for the default section
            self._add_data_for_section(data[self.survey_name], record,
                                       main_sections_columns, self.survey_name)
            parent_index = main_section[self.CURRENT_INDEX_META]

            for sheet_name, section in self.sections.iteritems():
                # skip default section i.e survey name
                if sheet_name != self.survey_name:
                    xpath = section["xpath"]
                    columns = section["columns"]
                    # TODO: handle nested repeats -ignoring nested repeats for
                    # now which will not be in the top level record, perhaps
                    # nest sections as well so we can recurs in and get them
                    if record.has_key(xpath):
                        repeat_records = record[xpath]
                        num_repeat_records = len(repeat_records)
                        for repeat_record in repeat_records:
                            self._add_data_for_section(data[sheet_name],
                                                       repeat_record, columns,
                                                       sheet_name,
                                                       parent_index,
                                                       self.survey_name)

        return data

    def _add_data_for_section(self,
                              data_section,
                              record,
                              columns,
                              section_name,
                              parent_index=-1,
                              parent_table_name=None):
        data_section.append({})
        self.sections[section_name][self.CURRENT_INDEX_META] += 1
        index = self.sections[section_name][self.CURRENT_INDEX_META]
        #data_section[len(data_section)-1].update(record) # we could simply do
        # this but end up with duplicate data from repeats

        if self.split_select_multiples:
            # find any select multiple(s) and add additional columns to record
            record = self._split_select_multiples(record,
                                                  self.select_multiples)
        # alt, precision
        self._split_gps_fields(record, self.gps_fields)
        for column in columns:
            data_value = None
            try:
                data_value = record[column]
            except KeyError:
                # a record may not have responses for some elements simply
                # because they were not captured
                pass
            data_section[len(data_section
                             ) - 1].update(
                                 {
                                     self.group_delimiter.join(
                                         column.split('/')) if self.group_delimiter != DEFAULT_GROUP_DELIMITER else column:
                                     data_value
                                 })

        data_section[len(data_section) - 1].update({
            XLSDataFrameBuilder.INDEX_COLUMN:
            index,
            XLSDataFrameBuilder.PARENT_INDEX_COLUMN:
            parent_index,
            XLSDataFrameBuilder.PARENT_TABLE_NAME_COLUMN:
            parent_table_name
        })

        # add ADDITIONAL_COLUMNS
        data_section[len(data_section) - 1].update(
            dict([(column, record[column] if record.has_key(column) else None)
                  for column in self.ADDITIONAL_COLUMNS]))

    def _generate_sections(self):
        """
        Split survey questions into separate sections for each xls sheet and
        columns for each section
        """
        # clear list
        self.sections = OrderedDict()

        # dict of select multiple elements
        self.select_multiples = {}

        survey_element = self.dd.survey
        self.survey_name = get_valid_sheet_name(survey_element.name,
                                                self.sections.keys())
        self._create_section(self.survey_name,
                             survey_element.get_abbreviated_xpath(), False)
        # build sections
        self._build_sections_recursive(self.survey_name, self.dd.get_survey())

        for section_name in self.sections:
            self.sections[section_name]['columns'] += self.ADDITIONAL_COLUMNS
        self.get_exceeds_xls_limits()

    def _build_sections_recursive(self,
                                  section_name,
                                  element,
                                  is_repeating=False):
        """Builds a section's children and recurses any repeating sections
        to build those as a separate section
        """
        for child in element.children:
            # if a section, recurse
            if isinstance(child, Section):
                new_is_repeating = isinstance(child, RepeatingSection)
                new_section_name = section_name
                # if its repeating, build a new section
                if new_is_repeating:
                    new_section_name = get_valid_sheet_name(
                        child.name, self.sections.keys())
                    self._create_section(new_section_name,
                                         child.get_abbreviated_xpath(), True)

                self._build_sections_recursive(new_section_name, child,
                                               new_is_repeating)
            else:
                # add to survey_sections
                if isinstance(child, Question) and not \
                        question_types_to_exclude(child.type)\
                and not child.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE:
                    self._add_column_to_section(section_name, child)
                elif child.bind.get(u"type") == MULTIPLE_SELECT_BIND_TYPE:
                    self.select_multiples[child.get_abbreviated_xpath()] = \
                    [option.get_abbreviated_xpath() for option in
                            child.children]
                    # if select multiple, get its choices and make them
                    # columns
                    if self.split_select_multiples:
                        for option in child.children:
                            self._add_column_to_section(section_name, option)
                    else:
                        self._add_column_to_section(section_name, child)

                # split gps fields within this section
                if child.bind.get(u"type") == GEOPOINT_BIND_TYPE:
                    # add columns for geopoint components
                    for xpath in\
                        self.dd.get_additional_geopoint_xpaths(
                        child.get_abbreviated_xpath()):
                        self._add_column_to_section(section_name, xpath)

    def get_exceeds_xls_limits(self):
        if not hasattr(self, "exceeds_xls_limits"):
            self.exceeds_xls_limits = False
            if len(self.sections) > self.XLS_SHEET_COUNT_LIMIT:
                self.exceeds_xls_limits = True
            else:
                for section in self.sections.itervalues():
                    if len(section["columns"]) > self.XLS_COLUMN_COUNT_MAX:
                        self.exceeds_xls_limits = True
                        break
        return self.exceeds_xls_limits

    def _create_section(self, section_name, xpath, is_repeat):
        index = len(self.sections)
        self.sections[section_name] = {
            "name": section_name,
            "xpath": xpath,
            "columns": [],
            "is_repeat": is_repeat,
            self.CURRENT_INDEX_META: 0
        }

    def _add_column_to_section(self, sheet_name, column):
        section = self.sections[sheet_name]
        xpath = None
        if isinstance(column, SurveyElement):
            xpath = column.get_abbreviated_xpath()
        elif isinstance(column, basestring):
            xpath = column
        assert (xpath)
        # make sure column is not already in list
        if xpath not in section["columns"]:
            section["columns"].append(xpath)
Example #13
0
# big = big.drop('AnnStaticRet', 1)
# big = big.drop('AnnCapitalRet', 1)
# big['AnnStaticRet'] = new_ind.AnnStaticRet.values
# big['AnnCapitalRet'] = new_ind.AnnCapitalRet.values

today_str = str(str(month) + str(day) + str(year))

big = big.rename(columns={'Last': 'OptionPrice', 'industry': 'Industry'})

xlsx = '.xlsx'
csv = '.csv'
file_name = 'All_covered_call' + today_str

sectors = big.Sector.unique().astype(str)

name_xl = file_name + xlsx
writer = ExcelWriter(name_xl)
big.to_excel(writer, sheet_name='All Sectors')
summary = big.groupby(['Sector', 'Industry']).mean()
summary.to_excel(writer, sheet_name='Sector Summary')

for i in sectors:
    to_save = big[big.Sector == i]
    name = i.replace('/', '-')
    to_save.to_excel(writer, sheet_name=name)

writer.save()

name_cs = file_name + csv
big.to_csv(name_cs)
Example #14
0
                temp_frame2 = temp_frame2.dropna()

                if month == 0:
                    final_frame = final_frame.join(temp_frame2, how='right')
                else:
                    final_frame = pd.concat([final_frame, temp_frame2])

            except:
                pass

    print 'Just finished ticker %s of %s' % (ticker, num_tickers)

today = str(
    str(dt.datetime.now().month) + str(dt.datetime.now().day) +
    str(dt.datetime.now().year))

file_name = 'NASDAQ_covered_call' + today
xlsx = '.xlsx'
csv = '.csv'
name_xl = file_name + xlsx
name_cs = file_name + csv
writer = ExcelWriter(file_name)
final_frame.to_excel(writer, sheet_name='Covered Call')
writer.save()

final_frame.to_csv(name_cs)
end_time = time()
elapsed_time = end_time - start_time
print elapsed_time