Ejemplo n.º 1
0
 def to_dict(self):
     """
     Get book data structure as a dictionary
     """
     the_dict = OrderedDict()
     for sheet in self:
         the_dict.update({sheet.name: sheet.payload})
     return the_dict
Ejemplo n.º 2
0
def merge_readers(reader_array, outfilename=DEFAULT_OUT_FILE):
    """merge many readers

    With FilterableReader and SeriesReader, you can do custom filtering
    :param str outfilename: save the sheet as
    """
    if os.path.exists(outfilename):
        raise NotImplementedError(MESSAGE_WARNING)
    content = OrderedDict()
    for reader in reader_array:
        content.update(reader.dict)
    save_as(dest_file_name=outfilename, adict=content)
Ejemplo n.º 3
0
    def render_book_to_stream(self,
                              file_stream,
                              book,
                              inits=None,
                              mapdicts=None,
                              **keywords):
        from pyexcel.book import to_book
        session, tables = file_stream
        thebook = book
        if isinstance(book, BookStream):
            thebook = to_book(book)
        initializers = inits
        if initializers is None:
            initializers = [None] * len(tables)
        if mapdicts is None:
            mapdicts = [None] * len(tables)
        for sheet in thebook:
            if len(sheet.colnames) == 0:
                sheet.name_columns_by_row(0)
        colnames_array = [sheet.colnames for sheet in book]
        scattered = zip(tables, colnames_array, mapdicts, initializers)

        importer = sql.SQLTableImporter(session)
        for each_table in scattered:
            adapter = sql.SQLTableImportAdapter(each_table[0])
            adapter.column_names = each_table[1]
            adapter.column_name_mapping_dict = each_table[2]
            adapter.row_initializer = each_table[3]
            importer.append(adapter)
        to_store = OrderedDict()
        for sheet_name in thebook.sheet_names():
            # due book.to_dict() brings in column_names
            # which corrupts the data
            to_store[sheet_name] = book[sheet_name].get_internal_array()
        save_data(importer, to_store, file_type=self._file_type, **keywords)
Ejemplo n.º 4
0
    def render_book_to_stream(
        self, file_stream, book, inits=None, mapdicts=None, **keywords
    ):
        session, tables = file_stream
        thebook = book
        initializers = inits
        colnames_array = common.get_book_headers_in_array(book)
        if initializers is None:
            initializers = [None] * len(tables)
        if mapdicts is None:
            mapdicts = [None] * len(tables)
        scattered = zip(tables, colnames_array, mapdicts, initializers)

        importer = sql.SQLTableImporter(session)
        for each_table in scattered:
            adapter = sql.SQLTableImportAdapter(each_table[0])
            adapter.column_names = each_table[1]
            adapter.column_name_mapping_dict = each_table[2]
            adapter.row_initializer = each_table[3]
            importer.append(adapter)
        to_store = OrderedDict()
        for sheet in thebook:
            # due book.to_dict() brings in column_names
            # which corrupts the data
            to_store[sheet.name] = sheet.get_internal_array()
        save_data(importer, to_store, file_type=self._file_type, **keywords)
Ejemplo n.º 5
0
    def project(self, new_ordered_columns, exclusion=False):
        """
        Rearrange the sheet.

        Example:

           >>> sheet = Sheet(
           ... [["A", "B", "C"], [1, 2, 3], [11, 22, 33], [111, 222, 333]],
           ... name_columns_by_row=0)
           >>> sheet.project(["B", "A", "C"])
           pyexcel sheet:
           +-----+-----+-----+
           |  B  |  A  |  C  |
           +=====+=====+=====+
           | 2   | 1   | 3   |
           +-----+-----+-----+
           | 22  | 11  | 33  |
           +-----+-----+-----+
           | 222 | 111 | 333 |
           +-----+-----+-----+
           >>> sheet.project(["B", "C"])
           pyexcel sheet:
           +-----+-----+
           |  B  |  C  |
           +=====+=====+
           | 2   | 3   |
           +-----+-----+
           | 22  | 33  |
           +-----+-----+
           | 222 | 333 |
           +-----+-----+
           >>> sheet.project(["B", "C"], exclusion=True)
           pyexcel sheet:
           +-----+
           |  A  |
           +=====+
           | 1   |
           +-----+
           | 11  |
           +-----+
           | 111 |
           +-----+

        """
        from pyexcel import get_array

        the_dict = self.to_dict()
        new_dict = OrderedDict()
        if exclusion:
            for column in the_dict.keys():
                if column not in new_ordered_columns:
                    new_dict[column] = the_dict[column]
        else:
            for column in new_ordered_columns:
                new_dict[column] = the_dict[column]

        array = get_array(adict=new_dict)
        return Sheet(array, name=self.name, name_columns_by_row=0)
Ejemplo n.º 6
0
def _convert_dict_to_ordered_dict(the_dict):
    keys = the_dict.keys()
    if not PY2:
        keys = list(keys)
    keys = sorted(keys)
    ret = OrderedDict()
    for key in keys:
        ret[key] = the_dict[key]
    return ret
Ejemplo n.º 7
0
    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        if sheets is None:
            return
        self.sheets = OrderedDict()
        keys = sheets.keys()
        if not isinstance(sheets, OrderedDict):
            # if the end user does not care about the order
            # we put alphatical order
            keys = sorted(keys)
        for name in keys:
            sheet = SheetStream(name, sheets[name])
            # this sheets keep sheet order
            self.sheets.update({name: sheet})
            # this provide the convenience of access the sheet
            self.__dict__[name] = sheet
        self.name_array = list(self.sheets.keys())
Ejemplo n.º 8
0
def to_dict(an_object):
    """convert a reader iterator to a dictionary"""
    the_dict = OrderedDict()
    series = "Series_%d"
    count = 1
    for row in an_object:
        if type(row) == dict:
            the_dict.update(row)
        elif isinstance(row, Sheet):
            the_dict.update({row.name: row.to_array()})
        else:
            key = series % count
            the_dict.update({key: row})
            count += 1
    return the_dict
Ejemplo n.º 9
0
    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        if sheets is None:
            return
        self.sheets = OrderedDict()
        keys = sheets.keys()
        if not isinstance(sheets, OrderedDict):
            # if the end user does not care about the order
            # we put alphatical order
            keys = sorted(keys)
        for name in keys:
            sheet = SheetStream(name, sheets[name])
            # this sheets keep sheet order
            self.sheets.update({name: sheet})
            # this provide the convenience of access the sheet
            self.__dict__[name] = sheet
        self.name_array = list(self.sheets.keys())
Ejemplo n.º 10
0
    def render_book_to_stream(self,
                              models,
                              thebook,
                              inits=None,
                              mapdicts=None,
                              batch_size=None,
                              **keywords):
        from pyexcel.book import to_book
        book = thebook
        if isinstance(thebook, BookStream):
            book = to_book(thebook)
        new_models = [model for model in models if model is not None]
        initializers = inits
        if initializers is None:
            initializers = [None] * len(new_models)
        if mapdicts is None:
            mapdicts = [None] * len(new_models)
        for sheet in book:
            if len(sheet.colnames) == 0:
                sheet.name_columns_by_row(0)
        colnames_array = [sheet.colnames for sheet in book]
        scattered = zip(new_models, colnames_array, mapdicts, initializers)

        importer = django.DjangoModelImporter()
        for each_model in scattered:
            adapter = django.DjangoModelImportAdapter(each_model[0])
            adapter.column_names = each_model[1]
            adapter.column_name_mapping_dict = each_model[2]
            adapter.row_initializer = each_model[3]
            importer.append(adapter)
        to_store = OrderedDict()
        for sheet_name in book.sheet_names():
            # due book.to_dict() brings in column_names
            # which corrupts the data
            to_store[sheet_name] = book[sheet_name].get_internal_array()
        save_data(importer,
                  to_store,
                  file_type=self._file_type,
                  batch_size=batch_size,
                  **keywords)
Ejemplo n.º 11
0
    def render_book_to_stream(
        self,
        models,
        book,
        inits=None,
        mapdicts=None,
        batch_size=None,
        **keywords
    ):
        colnames_array = common.get_book_headers_in_array(book)
        new_models = [model for model in models if model is not None]
        initializers = inits
        if initializers is None:
            initializers = [None] * len(new_models)
        if mapdicts is None:
            mapdicts = [None] * len(new_models)
        scattered = zip(new_models, colnames_array, mapdicts, initializers)

        importer = django.DjangoModelImporter()
        for each_model in scattered:
            adapter = django.DjangoModelImportAdapter(each_model[0])
            adapter.column_names = each_model[1]
            adapter.column_name_mapping_dict = each_model[2]
            adapter.row_initializer = each_model[3]
            importer.append(adapter)
        to_store = OrderedDict()
        for sheet in book:
            # due book.to_dict() brings in column_names
            # which corrupts the data
            to_store[sheet.name] = sheet.get_internal_array()
        save_data(
            importer,
            to_store,
            file_type=self._file_type,
            batch_size=batch_size,
            **keywords
        )
Ejemplo n.º 12
0
class BookStream(object):
    """
    Memory efficient book representation

    Comparing with :class:`pyexcel.Book`, the instace of
    this class uses :class:`pyexcel.generators.SheetStream` as
    its internal repesentation of sheet objects. Because `SheetStream`
    does not read data into memory, it is memory efficient.
    """
    def __init__(self, sheets=None, filename="memory", path=None):
        """Book constructor

        Selecting a specific book according to filename extension
        :param OrderedDict/dict sheets: a dictionary of data
        :param str filename: the physical file
        :param str path: the relative path or absolute path
        :param set keywords: additional parameters to be passed on
        """
        self.path = path
        self.filename = filename
        self.name_array = []
        if sheets:
            self.load_from_sheets(sheets)
        else:
            self.sheets = {}

    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        if sheets is None:
            return
        self.sheets = OrderedDict()
        keys = sheets.keys()
        if not isinstance(sheets, OrderedDict):
            # if the end user does not care about the order
            # we put alphatical order
            keys = sorted(keys)
        for name in keys:
            sheet = SheetStream(name, sheets[name])
            # this sheets keep sheet order
            self.sheets.update({name: sheet})
            # this provide the convenience of access the sheet
            self.__dict__[name] = sheet
        self.name_array = list(self.sheets.keys())

    def to_dict(self):
        """
        Get book data structure as a dictionary
        """
        the_dict = OrderedDict()
        for sheet in self:
            the_dict.update({sheet.name: sheet.payload})
        return the_dict

    def __iter__(self):
        return SheetIterator(self)

    def number_of_sheets(self):
        """Return the number of sheets"""
        return len(self.name_array)

    def __getitem__(self, index):
        if index < len(self.name_array):
            sheet_name = self.name_array[index]
            return self.sheets[sheet_name]
Ejemplo n.º 13
0
class BookStream(object):
    """
    Memory efficient book representation

    Comparing with :class:`pyexcel.Book`, the instace of
    this class uses :class:`pyexcel.generators.SheetStream` as
    its internal repesentation of sheet objects. Because `SheetStream`
    does not read data into memory, it is memory efficient.
    """
    def __init__(self, sheets=None, filename="memory", path=None):
        """Book constructor

        Selecting a specific book according to filename extension
        :param OrderedDict/dict sheets: a dictionary of data
        :param str filename: the physical file
        :param str path: the relative path or absolute path
        :param set keywords: additional parameters to be passed on
        """
        self.path = path
        self.filename = filename
        self.name_array = []
        if sheets:
            self.load_from_sheets(sheets)
        else:
            self.sheets = {}

    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        if sheets is None:
            return
        self.sheets = OrderedDict()
        keys = sheets.keys()
        if not isinstance(sheets, OrderedDict):
            # if the end user does not care about the order
            # we put alphatical order
            keys = sorted(keys)
        for name in keys:
            sheet = SheetStream(name, sheets[name])
            # this sheets keep sheet order
            self.sheets.update({name: sheet})
            # this provide the convenience of access the sheet
            self.__dict__[name] = sheet
        self.name_array = list(self.sheets.keys())

    def to_dict(self):
        """
        Get book data structure as a dictionary
        """
        the_dict = OrderedDict()
        for sheet in self:
            the_dict.update({sheet.name: sheet.payload})
        return the_dict

    def __iter__(self):
        return SheetIterator(self)

    def number_of_sheets(self):
        """Return the number of sheets"""
        return len(self.name_array)

    def __getitem__(self, index):
        if index < len(self.name_array):
            sheet_name = self.name_array[index]
            return self.sheets[sheet_name]
Ejemplo n.º 14
0
def make_spreadsheet(full_orders,
                     buys_sorted,
                     sells_sorted,
                     output_dir='output',
                     year=2017):
    out_file = os.path.join(output_dir, '_'.join(
        ['Transactions', 'Crypto', str(year)])) + '.ods'
    book = OrderedDict()

    # "Transactions" sheet
    trans_sheet = []
    book['Transactions'] = trans_sheet
    trans_sheet.append([
        'Order Time UTC', 'product', 'currency', 'currency_pair', 'buysell',
        'cost', 'amount', 'cost_per_coin'
    ])
    # 'order_time', 'product', 'currency', 'currency_pair', 'buysell', 'cost', 'amount', 'cost_per_coin'
    buys_sells_sorted = sorted(buys_sorted + sells_sorted,
                               key=lambda order: order['order_time'])
    for order in buys_sells_sorted:
        trans_sheet.append([
            order['order_time'].isoformat(), order['product'],
            order['currency'], order['currency_pair'], order['buysell'],
            order['cost'], order['amount'], order['cost_per_coin']
        ])

    # "8949" sheet
    form_8949_sheet = []
    book['8949'] = form_8949_sheet
    form_8949_sheet.append([
        'Description', 'Date bought', 'Date sold', 'Proceeds', 'Cost basis',
        'Gain/Loss'
    ])

    # Full order is [description, date acquired, date sold, proceeds, cost basis, gain/loss] (populated in fill_8949.py)
    DESC, DATE_ACQ, DATE_SOLD, PROCEEDS, COST_BASIS, GAIN_LOSS = range(5 + 1)
    form_8949_sales_by_month = {
        month_num: {
            'first_idx': -1,
            'last_idx': -1,
            'proceeds': 0,
            'gain_loss': 0
        }
        for month_num in range(1, 12 + 1)
    }
    total_8949_proceeds = 0
    total_8949_gain_loss = 0

    for idx, full_order in enumerate(full_orders):
        form_8949_sheet.append(full_order)
        # caclulate start/end indices of sales by month
        sale_dt = datetime.datetime.strptime(full_order[DATE_SOLD], '%m/%d/%Y')
        month_sales = form_8949_sales_by_month[sale_dt.month]
        if month_sales['first_idx'] == -1:
            month_sales['first_idx'] = idx

        month_sales['last_idx'] = idx
        month_sales['proceeds'] += full_order[PROCEEDS]
        total_8949_proceeds += full_order[PROCEEDS]
        month_sales['gain_loss'] += full_order[GAIN_LOSS]
        total_8949_gain_loss += full_order[GAIN_LOSS]

    # "Calculated" sheet
    calc_sheet = []
    book['Calculated'] = calc_sheet
    calc_sheet.append([
        'Total 8949 gain/loss:',
        "=SUM($'8949'.F2:F%d)" % (len(form_8949_sheet)), total_8949_gain_loss
    ])
    calc_sheet.append([
        'Total 8949 proceeds:',
        "=SUM($'8949'.D2:D%d)" % (len(form_8949_sheet)), total_8949_proceeds
    ])
    for month_num in range(1, 12 + 1):
        month_sales = form_8949_sales_by_month[month_num]
        month_first_idx = month_sales['first_idx']
        month_last_idx = month_sales['last_idx']
        if month_first_idx != -1:
            calc_sheet.append([
                'Total 8949 proceeds in month #%d:' % (month_num),
                "=SUM($'8949'.D%d:D%d)" %
                (month_first_idx + 2, month_last_idx + 2),
                month_sales['proceeds']
            ])
    # calc_sheet.append([ 'Total trans amounts:', "=SUM($'Transactions'.F2:F%d)" % (len(buys_sells_sorted)) ])

    p.save_book_as(bookdict=book, dest_file_name=out_file)
    print("Saved spreadsheet as %s" % (out_file))
Ejemplo n.º 15
0
class Book(with_metaclass(BookMeta, object)):
    """Read an excel book that has one or more sheets

    For csv file, there will be just one sheet
    """
    def __init__(self, sheets=None, filename="memory", path=None):
        """Book constructor

        Selecting a specific book according to filename extension
        :param OrderedDict/dict sheets: a dictionary of data
        :param str filename: the physical file
        :param str path: the relative path or absolute path
        :param set keywords: additional parameters to be passed on
        """
        self.init(sheets=sheets, filename=filename, path=path)

    def init(self, sheets=None, filename="memory", path=None):
        self.path = path
        self.filename = filename
        self.name_array = []
        self.load_from_sheets(sheets)

    def load_from_sheets(self, sheets):
        """Load content from existing sheets

        :param dict sheets: a dictionary of sheets. Each sheet is
        a list of lists
        """
        self.sheets = OrderedDict()
        if sheets is None:
            return
        keys = sheets.keys()
        if not isinstance(sheets, OrderedDict):
            # if the end user does not care about the order
            # we put alphatical order
            keys = sorted(keys)
        for name in keys:
            sheet = self.get_sheet(sheets[name], name)
            # this sheets keep sheet order
            self.sheets.update({name: sheet})
            # this provide the convenience of access the sheet
            self.__dict__[name] = sheet
        self.name_array = list(self.sheets.keys())

    def get_sheet(self, array, name):
        """Create a sheet from a list of lists"""
        return Sheet(array, name)

    def __iter__(self):
        return SheetIterator(self)

    def number_of_sheets(self):
        """Return the number of sheets"""
        return len(self.name_array)

    def sheet_names(self):
        """Return all sheet names"""
        return self.name_array

    def sheet_by_name(self, name):
        """Get the sheet with the specified name"""
        return self.sheets[name]

    def sheet_by_index(self, index):
        """Get the sheet with the specified index"""
        if index < len(self.name_array):
            sheet_name = self.name_array[index]
            return self.sheets[sheet_name]

    def remove_sheet(self, sheet):
        """Remove a sheet"""
        if isinstance(sheet, int):
            if sheet < len(self.name_array):
                sheet_name = self.name_array[sheet]
                del self.sheets[sheet_name]
                self.name_array = list(self.sheets.keys())
            else:
                raise IndexError
        elif isinstance(sheet, str):
            if sheet in self.name_array:
                del self.sheets[sheet]
                self.name_array = list(self.sheets.keys())
            else:
                raise KeyError
        else:
            raise TypeError

    def __getitem__(self, key):
        """Override operator[]"""
        if isinstance(key, int):
            return self.sheet_by_index(key)
        else:
            return self.sheet_by_name(key)

    def __delitem__(self, other):
        """Override del book[index]"""
        self.remove_sheet(other)
        return self

    def __add__(self, other):
        """Override operator +

        example::

            book3 = book1 + book2
            book3 = book1 + book2["Sheet 1"]

        """
        content = {}
        current_dict = utils.to_dict(self)
        for k in current_dict.keys():
            new_key = k
            if len(current_dict.keys()) == 1:
                new_key = "%s_%s" % (self.filename, k)
            content[new_key] = current_dict[k]
        if isinstance(other, Book):
            other_dict = utils.to_dict(other)
            for l in other_dict.keys():
                new_key = l
                if len(other_dict.keys()) == 1:
                    new_key = other.filename
                if new_key in content:
                    uid = utils.local_uuid()
                    new_key = "%s_%s" % (l, uid)
                content[new_key] = other_dict[l]
        elif isinstance(other, Sheet):
            new_key = other.name
            if new_key in content:
                uid = utils.local_uuid()
                new_key = "%s_%s" % (other.name, uid)
            content[new_key] = other.to_array()
        else:
            raise TypeError
        output = Book()
        output.load_from_sheets(content)
        return output

    def __iadd__(self, other):
        """Operator overloading +=

        example::

            book += book2
            book += book2["Sheet1"]

        """
        if isinstance(other, Book):
            names = other.sheet_names()
            for name in names:
                new_key = name
                if len(names) == 1:
                    new_key = other.filename
                if new_key in self.name_array:
                    uid = utils.local_uuid()
                    new_key = "%s_%s" % (name, uid)
                self.sheets[new_key] = self.get_sheet(other[name].to_array(),
                                                      new_key)
        elif isinstance(other, Sheet):
            new_key = other.name
            if new_key in self.name_array:
                uid = utils.local_uuid()
                new_key = "%s_%s" % (other.name, uid)
            self.sheets[new_key] = self.get_sheet(other.to_array(), new_key)
        else:
            raise TypeError
        self.name_array = list(self.sheets.keys())
        return self

    def to_dict(self):
        """Convert the book to a dictionary"""
        from .utils import to_dict
        return to_dict(self)

    def __repr__(self):
        return self.texttable

    def __str__(self):
        return self.texttable

    def save_as(self, filename):
        """Save the content to a new file

        :param str filename: a file path
        """
        return save_book(self, file_name=filename)

    def save_to_memory(self, file_type, stream=None, **keywords):
        """Save the content to a memory stream

        :param file_type: what format the stream is in
        :param stream: a memory stream.  Note in Python 3, for csv and tsv
                       format, please pass an instance of StringIO. For xls,
                       xlsx, and ods, an instance of BytesIO.
        """
        get_method = getattr(self, "get_%s" % file_type)
        content = get_method(file_stream=stream, **keywords)
        return content

    def save_to_django_models(self, models,
                              initializers=None, mapdicts=None,
                              batch_size=None):
        """Save to database table through django model

        :param models: a list of database models, that is accepted by
                       :meth:`Sheet.save_to_django_model`. The sequence
                       of tables matters when there is dependencies in
                       between the tables. For example, **Car** is made
                       by **Car Maker**. **Car Maker** table should be
                       specified before **Car** table.
        :param initializers: a list of intialization functions for your
                             tables and the sequence should match tables,
        :param mapdicts: custom map dictionary for your data columns
                         and the sequence should match tables
        """
        save_book(self,
                  models=models,
                  initializers=initializers,
                  mapdicts=mapdicts,
                  batch_size=batch_size)

    def save_to_database(self, session, tables,
                         initializers=None, mapdicts=None,
                         auto_commit=True):
        """Save data in sheets to database tables

        :param session: database session
        :param tables: a list of database tables, that is accepted by
                       :meth:`Sheet.save_to_database`. The sequence of tables
                       matters when there is dependencies in between the
                       tables. For example, **Car** is made by **Car Maker**.
                       **Car Maker** table should
                       be specified before **Car** table.
        :param initializers: a list of intialization functions for your
                             tables and the sequence should match tables,
        :param mapdicts: custom map dictionary for your data columns
                         and the sequence should match tables
        :param auto_commit: by default, data is committed.

        """
        save_book(self,
                  session=session,
                  tables=tables,
                  initializers=initializers,
                  mapdicts=mapdicts,
                  auto_commit=auto_commit)