Ejemplo n.º 1
0
    def update_from_unit(self, translation, unit, pos, template=None):
        """
        Process translation toolkit unit and stores/updates database entry.
        """
        if template is None:
            src = get_source(unit)
            ctx = get_context(unit)
        else:
            src = get_target(template)
            ctx = get_context(template)
        checksum = msg_checksum(src, ctx)

        # Try getting existing unit
        from weblate.trans.models import Unit

        dbunit = None
        try:
            dbunit = self.get(translation=translation, checksum=checksum)
            force = False
        except Unit.MultipleObjectsReturned:
            # Some inconsistency (possibly race condition), try to recover
            self.filter(translation=translation, checksum=checksum).delete()
        except Unit.DoesNotExist:
            pass

        # Create unit if it does not exist
        if dbunit is None:
            dbunit = Unit(translation=translation, checksum=checksum, source=src, context=ctx)
            force = True

        # Update all details
        dbunit.update_from_unit(unit, pos, force, template)

        # Return result
        return dbunit, force
Ejemplo n.º 2
0
def get_oracle(url):
  source = util.get_source(url)
  html = lxml.html.document_fromstring(source)
  html.make_links_absolute(url, resolve_base_href=True)
  util.save_file(lxml.html.tostring(html), 'oracle.html')
  util.screenshot('oracle.html', 'oracle.png')
  return html
Ejemplo n.º 3
0
Archivo: gen.py Proyecto: jineli/websee
def get_oracle(url):
  source = util.get_source(url)
  parser = lxml.etree.HTMLParser()
  etree = lxml.etree.parse(StringIO(source), parser)
  html = lxml.html.document_fromstring(source)
  html.make_links_absolute(url, resolve_base_href=True)
  html.doctype = etree.docinfo.doctype
  return html
Ejemplo n.º 4
0
def get_oracle(url):
    source = util.get_source(url)
    parser = lxml.etree.HTMLParser()
    etree = lxml.etree.parse(StringIO(source), parser)
    html = lxml.html.document_fromstring(source)
    html.make_links_absolute(url, resolve_base_href=True)
    html.doctype = etree.docinfo.doctype
    return html
Ejemplo n.º 5
0
def main():
    website = "https://nobaproject.com/textbooks/new-textbook-4782ff3c-3de1-4700-a262-9cc372550395/modules/sensation-and-perception"

    # find out the source of the website and if it has a specific scraper for it
    source = util.get_source(website)
    scraper = util.get_scraper(source)
    driver = util.init_driver(chrome_path)

    # create article attributes based on the content scraped
    article = fill_article_class(scraper, website, driver)
    print(article.headline)
    print(generate_content(article))

    # parse to get the audio version
    audio = get_audio(article, 'en')
    audio.save('test.mp3')

    winsound.PlaySound('test.mp3', winsound.SND_FILENAME | winsound.SND_ASYNC)
Ejemplo n.º 6
0
    def update_from_unit(self, translation, unit, pos, template=None):
        '''
        Process translation toolkit unit and stores/updates database entry.
        '''
        if template is None:
            src = get_source(unit)
            ctx = get_context(unit)
        else:
            src = get_target(template)
            ctx = get_context(template)
        checksum = msg_checksum(src, ctx)

        # Try getting existing unit
        from weblate.trans.models import Unit
        dbunit = None
        try:
            dbunit = self.get(translation=translation, checksum=checksum)
            force = False
        except Unit.MultipleObjectsReturned:
            # Some inconsistency (possibly race condition), try to recover
            self.filter(translation=translation, checksum=checksum).delete()
        except Unit.DoesNotExist:
            pass

        # Create unit if it does not exist
        if dbunit is None:
            dbunit = Unit(translation=translation,
                          checksum=checksum,
                          source=src,
                          context=ctx)
            force = True

        # Update all details
        dbunit.update_from_unit(unit, pos, force, template)

        # Return result
        return dbunit, force