def import_data(filename):

    for index, line in enumerate(open(filename)):
        cols = line.split('\t')

        if len(cols) != 4:
            sys.stderr.write('{}\n'.format(line))
            continue

        timestamp = unix_time(str2datetime(cols[2]))
        translation = Translation(
            id=uuid64.issue(timestamp),
            source_lang=cols[0].strip(),
            target_lang=cols[1].strip(),
            hash=hashlib.sha1(cols[3].strip()).hexdigest(),
            data=json.loads(cols[3])
        )

        print('{}: Processing data ({}, {}, {})'.format(
            index + 1,
            translation.source_lang,
            translation.target_lang, timestamp))
        try:
            db.session.add(translation)
            db.session.commit()
        except IntegrityError:
            db.session.rollback()
Beispiel #2
0
def import_data(filename):

    for index, line in enumerate(open(filename)):
        cols = line.split('\t')

        if len(cols) != 4:
            sys.stderr.write('{}\n'.format(line))
            continue

        timestamp = unix_time(str2datetime(cols[2]))
        translation = Translation(id=uuid64.issue(timestamp),
                                  source_lang=cols[0].strip(),
                                  target_lang=cols[1].strip(),
                                  hash=hashlib.sha1(
                                      cols[3].strip()).hexdigest(),
                                  data=json.loads(cols[3]))

        print('{}: Processing data ({}, {}, {})'.format(
            index + 1, translation.source_lang, translation.target_lang,
            timestamp))
        try:
            db.session.add(translation)
            db.session.commit()
        except IntegrityError:
            db.session.rollback()
Beispiel #3
0
def store_phrases(source_lang, target_lang, observed_at, phrases):
    """
    :type observed_at: datetime
    """
    for source_text, target_texts in phrases:
        for target_text in target_texts:
            record_id = uuid64.issue()
            statement = (
                "INSERT INTO phrase (id, observed_at, source_lang, "
                "target_lang, source_text, target_text, count) VALUES("
                "'{}', '{}', '{}', '{}', '{}', '{}', '0');".format(
                    record_id,
                    observed_at,
                    source_lang,
                    target_lang,
                    source_text.replace("'", "''"),
                    target_text.replace("'", "''"),
                )
            )
            print(statement)
            statement2 = (
                "UPDATE phrase SET count = count + 1 WHERE "
                "source_lang = '{}' AND "
                "target_lang = '{}' AND "
                "source_text = '{}' AND "
                "target_text = '{}';".format(
                    source_lang, target_lang, source_text.replace("'", "''"), target_text.replace("'", "''")
                )
            )
            print(statement2)
Beispiel #4
0
    def create(cls, commit=True, **kwargs):
        kwargs.update(dict(id=uuid64.issue()))
        instance = cls(**kwargs)

        if hasattr(instance, 'timestamp') \
                and getattr(instance, 'timestamp') is None:
            instance.timestamp = datetime.utcnow()

        return instance.save(commit=commit)
Beispiel #5
0
    def create(cls, commit=True, **kwargs):
        kwargs.update(dict(id=uuid64.issue()))
        instance = cls(**kwargs)

        if hasattr(instance, 'timestamp') \
                and getattr(instance, 'timestamp') is None:
            instance.timestamp = datetime.utcnow()

        return instance.save(commit=commit)
Beispiel #6
0
class CRUDMixin(object):
    """Copied from https://realpython.com/blog/python/python-web-applications-with-flask-part-ii/
    """  # noqa

    __table_args__ = {'extend_existing': True}

    id = db.Column(db.BigInteger,
                   primary_key=True,
                   autoincrement=False,
                   default=uuid64.issue())

    @classmethod
    def create(cls, commit=True, **kwargs):
        if 'id' not in kwargs:
            kwargs.update(dict(id=uuid64.issue()))
        instance = cls(**kwargs)

        if hasattr(instance, 'timestamp') \
                and getattr(instance, 'timestamp') is None:
            instance.timestamp = datetime.utcnow()

        return instance.save(commit=commit)

    @classmethod
    def get(cls, id):
        return cls.query.get(id)

    # We will also proxy Flask-SqlAlchemy's get_or_404
    # for symmetry
    @classmethod
    def get_or_404(cls, id):
        return cls.query.get_or_404(id)

    @classmethod
    def exists(cls, **kwargs):
        row = cls.query.filter_by(**kwargs).first()
        return row is not None

    def update(self, commit=True, **kwargs):
        for attr, value in kwargs.iteritems():
            setattr(self, attr, value)
        return commit and self.save() or self

    def save(self, commit=True):
        db.session.add(self)
        if commit:
            db.session.commit()
        return self

    def delete(self, commit=True):
        db.session.delete(self)
        return commit and db.session.commit()
Beispiel #7
0
    def create(cls, commit=True, ignore_if_exists=False, **kwargs):
        if 'id' not in kwargs:
            kwargs.update(dict(id=uuid64.issue()))
        instance = cls(**kwargs)

        if hasattr(instance, 'created_at') \
                and getattr(instance, 'created_at') is None:
            instance.created_at  = datetime.utcnow()

        try:
            return instance.save(commit=commit)
        except (IntegrityError, InvalidRequestError):
            if ignore_if_exists:
                db.session.rollback()
                return cls.find(**kwargs)
            else:
                raise
Beispiel #8
0
    def create(cls, commit=True, ignore_if_exists=False, **kwargs):
        if 'id' not in kwargs:
            kwargs.update(dict(id=uuid64.issue()))
        instance = cls(**kwargs)

        if hasattr(instance, 'created_at') \
                and getattr(instance, 'created_at') is None:
            instance.created_at = datetime.utcnow()

        try:
            return instance.save(commit=commit)
        except (IntegrityError, InvalidRequestError):
            if ignore_if_exists:
                db.session.rollback()
                return cls.find(**kwargs)
            else:
                raise
Beispiel #9
0
def store_sentences(source_lang, target_lang, observed_at, sentences):
    """
    :type observed_at: datetime

    :param sentences: list of (source, target) sentences
    """
    for source_text, target_text in sentences:
        source_hash = hashlib.sha1(source_text.encode("utf-8")).hexdigest()
        statement = (
            "INSERT INTO sentence (id, observed_at, source_lang, "
            "target_lang, source_text_hash, source_text, target_text) "
            "VALUES('{}', '{}', '{}', '{}', '{}', '{}', '{}');".format(
                uuid64.issue(),
                observed_at,
                source_lang,
                target_lang,
                source_hash,
                source_text.replace("'", "''"),
                target_text.replace("'", "''"),
            )
        )
        print(statement)
Beispiel #10
0
class CRUDMixin(object):
    """Copied from https://realpython.com/blog/python/python-web-applications-with-flask-part-ii/
    """  # noqa

    __table_args__ = {'extend_existing': True}  # type: Any

    id = db.Column(db.BigInteger,
                   primary_key=True,
                   autoincrement=False,
                   default=uuid64.issue())

    @classmethod
    def create(cls, commit=True, ignore_if_exists=False, **kwargs):
        if 'id' not in kwargs:
            kwargs.update(dict(id=uuid64.issue()))
        instance = cls(**kwargs)

        if hasattr(instance, 'created_at') \
                and getattr(instance, 'created_at') is None:
            instance.created_at = datetime.utcnow()

        try:
            return instance.save(commit=commit)
        except (IntegrityError, InvalidRequestError):
            if ignore_if_exists:
                db.session.rollback()
                return cls.find(**kwargs)
            else:
                raise

    @classmethod
    def get(cls, id):
        return cls.query.get(id)

    # We will also proxy Flask-SqlAlchemy's get_or_404
    # for symmetry
    @classmethod
    def get_or_404(cls, id):
        return cls.query.get_or_404(id)

    @classmethod
    def find(cls, **kwargs):
        return cls.query.filter_by(**kwargs).first()

    @classmethod
    def exists(cls, **kwargs):
        row = cls.find(**kwargs)
        return row is not None

    def update(self, commit=True, **kwargs):
        for attr, value in kwargs.iteritems():
            setattr(self, attr, value)
        return commit and self.save() or self

    def save(self, commit=True):
        db.session.add(self)
        if commit:
            db.session.commit()
        return self

    def delete(self, commit=True):
        db.session.delete(self)
        return commit and db.session.commit()

    def __iter__(self):
        for column in self.__table__.columns:
            yield column.name, str(getattr(self, column.name))