Python get_file_handler Exemples, doc_trans.utils.get_file_handler Python Exemples

Exemple #1

0

Afficher le fichier

def handle_page_added(project, added_version, page_path, page_simple_path):
    page = Page.objects.create(project=project,
                               path=page_simple_path,
                               current_version=added_version)
    print "created page '%s' for path '%s' at version '%s'" % (
        page.slug, page.path, added_version.revision)

    page_change = PageChange.objects.create(version=added_version,
                                            page=page,
                                            action='added')
    print "created page_change '%s' for page '%s'" % (
        page_change.action,
        page_change.page.path,
    )

    file_handler = get_file_handler(page)
    page_content = open(page_path).read()
    paragraphs = file_handler.split(page_content, file_name=page_path)
    for ordinal, paragraph_content in enumerate(paragraphs):
        paragraph = Paragraph.objects.create(page=page,
                                             current_version=added_version,
                                             original=paragraph_content,
                                             ordinal=ordinal + 1)


#        print "created paragraph '%s' for page '%s'" % (paragraph.ordinal, paragraph.page.path, )
    page.check()
    print

Exemple #2

0

Afficher le fichier

Fichier : models.py Projet : ZoomQuiet/openbookplatform

 def check(self):
     file_handler = get_file_handler(self)
     paragraphs_in_db = Paragraph.objects.filter(page = self, current_version = self.current_version).order_by('ordinal')
     for i, paragraph in enumerate(paragraphs_in_db):
         if i+1 != paragraph.ordinal:
             raise ValidationError, u"page '%s' ordinals didn't continuous in db\n\n%s\n\n" % (self.path, ",".join(str(o) for o in paragraphs_in_db.values_list('ordinal', flat=True)))
         
     page_content = open(os.path.join(self.project.doc_path, self.path)).read()
     file_handler.compare(page_content, file_handler.concat(paragraphs_in_db), self)

Exemple #3

0

Afficher le fichier

Fichier : models.py Projet : ZoomQuiet/openbookplatform

 def write_page(self):
     exprot_dir = os.path.join(settings.DOCS_DIR, self.project.slug, settings.TRANSLATION_LANGUAGE)
     paragraph_objects = self.current_paragraphs.order_by('ordinal')
     page_file = open(os.path.join(exprot_dir, self.path), 'w')
     translations =[p.original_or_translation for p in paragraph_objects]
     file_handler = get_file_handler(self)
     file_content = file_handler.concat_translations(translations)
     file_content = file_content.encode('utf-8')
     page_file.write(file_content)
     page_file.close()

Exemple #4

0

Afficher le fichier

Fichier : models.py Projet : ZoomQuiet/openbookplatform

 def write_page(self):
     exprot_dir = os.path.join(settings.DOCS_DIR, self.project.slug,
                               settings.TRANSLATION_LANGUAGE)
     paragraph_objects = self.current_paragraphs.order_by('ordinal')
     page_file = open(os.path.join(exprot_dir, self.path), 'w')
     translations = [p.original_or_translation for p in paragraph_objects]
     file_handler = get_file_handler(self)
     file_content = file_handler.concat_translations(translations)
     file_content = file_content.encode('utf-8')
     page_file.write(file_content)
     page_file.close()

Exemple #5

0

Afficher le fichier

Fichier : sync_doc.py Projet : ZoomQuiet/openbookplatform

def handle_page_added(project, added_version, page_path, page_simple_path):
    page = Page.objects.create(project = project, path = page_simple_path, current_version = added_version)
    print "created page '%s' for path '%s' at version '%s'" % (page.slug, page.path, added_version.revision)
    
    page_change = PageChange.objects.create(version = added_version, page = page, action = 'added')
    print "created page_change '%s' for page '%s'" % (page_change.action, page_change.page.path, )
    
    file_handler = get_file_handler(page)
    page_content = open(page_path).read()
    paragraphs = file_handler.split(page_content, file_name = page_path)
    for ordinal, paragraph_content in enumerate(paragraphs):
        paragraph = Paragraph.objects.create(page = page, current_version = added_version,
                                             original = paragraph_content, ordinal = ordinal + 1)
#        print "created paragraph '%s' for page '%s'" % (paragraph.ordinal, paragraph.page.path, )
    page.check()
    print

Exemple #6

0

Afficher le fichier

Fichier : models.py Projet : ZoomQuiet/openbookplatform

    def check(self):
        file_handler = get_file_handler(self)
        paragraphs_in_db = Paragraph.objects.filter(
            page=self,
            current_version=self.current_version).order_by('ordinal')
        for i, paragraph in enumerate(paragraphs_in_db):
            if i + 1 != paragraph.ordinal:
                raise ValidationError, u"page '%s' ordinals didn't continuous in db\n\n%s\n\n" % (
                    self.path, ",".join(
                        str(o)
                        for o in paragraphs_in_db.values_list('ordinal',
                                                              flat=True)))

        page_content = open(os.path.join(self.project.doc_path,
                                         self.path)).read()
        file_handler.compare(page_content,
                             file_handler.concat(paragraphs_in_db), self)

Exemple #7

0

Afficher le fichier

def handle_page_modified(project, before_version, modified_version, page_path,
                         page_simple_path):
    page = Page.objects.get(project=project,
                            path=page_simple_path,
                            current_version=before_version)
    print "found page '%s' for path '%s' at before_version '%s'" % (
        page.slug, page.path, page.current_version.revision)
    page.current_version = modified_version
    page.save()

    page_change = PageChange.objects.create(version=modified_version,
                                            page=page,
                                            action='modified')
    print "created page_change '%s' for page '%s'" % (
        page_change.action,
        page_change.page.path,
    )

    file_handler = get_file_handler(page)
    page_content = open(page_path).read()
    paragraphs = file_handler.split(page_content, file_name=page_path)
    paragraphs_list = []
    matched_paragraphs = []
    for ordinal, paragraph_content in enumerate(paragraphs):
        N = paragraphs[:ordinal].count(paragraph_content)
        #        print 'N %s' % (N)
        try:
            history_paragraph = Paragraph.objects.filter(
                page=page,
                current_version=before_version,
                original=paragraph_content)[N]
            if history_paragraph not in matched_paragraphs:
                #                print 'found history_paragraph %s' % (history_paragraph.id)
                matched_paragraphs.append(history_paragraph)
            else:
                history_paragraph = None
        except IndexError:
            history_paragraph = None
        paragraphs_list.append(
            [paragraph_content, ordinal + 1, history_paragraph, False])
        #(paragraph_content, ordinal+ 1, history_paragraph, 'handled?')
    print 'len(paragraphs_list) %s' % len(
        [p for p in paragraphs_list if not p[3]])
    for i, p in enumerate(paragraphs_list):
        if p[2] and not p[3]:
            print 'updating history_paragraph %s' % (p[2].id)
            p[2].ordinal = p[1]
            p[2].current_version = modified_version
            p[2].save()
            p[3] = True
    print 'len(paragraphs_list) %s' % len(
        [p for p in paragraphs_list if not p[3]])
    for i, p in enumerate(paragraphs_list):
        if not p[3]:
            paragraph_content_set = set(
                line.strip() for line in p[0].decode('utf-8').splitlines())
            for p_in_db in Paragraph.objects.filter(
                    page=page,
                    current_version=before_version,
                    modified_paragraph=None):
                p_in_db_content_set = set(
                    line.strip() for line in p_in_db.original.splitlines())
                similarity = 1.0 * len(paragraph_content_set
                                       & p_in_db_content_set) / len(
                                           paragraph_content_set)
                if similarity > min_similarity:
                    paragraph = Paragraph.objects.create(
                        page=page,
                        current_version=modified_version,
                        original=p[0],
                        ordinal=p[1],
                        history_paragraph=p_in_db)

                    history_translation = p_in_db.latest_translation
                    if history_translation:
                        new_translation = Translation.objects.create(
                            paragraph=paragraph,
                            history_translation=history_translation,
                            translator=history_translation.translator,
                            content=history_translation.content,
                            ip=history_translation.ip,
                        )

                    print 'similarity %s' % similarity
                    print 'paragraph %s(ord:%s) found history_paragraph %s(ord:%s)' % (
                        paragraph.id, paragraph.ordinal,
                        paragraph.history_paragraph.id,
                        paragraph.history_paragraph.ordinal)
                    page_change.paragraphs.add(paragraph)
                    p[3] = True
                    break
    print 'len(paragraphs_list) %s' % len(
        [p for p in paragraphs_list if not p[3]])
    for i, p in enumerate(paragraphs_list):
        if not p[3]:
            paragraph = Paragraph.objects.create(
                page=page,
                current_version=modified_version,
                original=p[0],
                ordinal=p[1])
            page_change.paragraphs.add(paragraph)
            p[3] = True
    unhandled_length = len([p for p in paragraphs_list if not p[3]])
    print 'len(paragraphs_list) %s' % unhandled_length
    if unhandled_length != 0:
        raise ValidationError, u"paragraphs %s didn't handled " % (u",".join(
            [str(p[1]) for p in paragraphs_list if not p[3]]), )

    paragraphs_count_in_db = Paragraph.objects.filter(
        page=page, current_version=modified_version).count()
    if len(paragraphs) != paragraphs_count_in_db:
        raise ValidationError, u"paragraphs count in file '%s' diff from count in db %s " % (
            len(paragraphs), paragraphs_count_in_db)

    page.check()

    print

Exemple #8

0

Afficher le fichier

Fichier : sync_doc.py Projet : ZoomQuiet/openbookplatform

def handle_page_modified(project, before_version, modified_version, page_path, page_simple_path):
    page = Page.objects.get(project = project, path = page_simple_path, current_version = before_version)
    print "found page '%s' for path '%s' at before_version '%s'" % (page.slug, page.path, page.current_version.revision)
    page.current_version = modified_version
    page.save()
    
    page_change = PageChange.objects.create(version = modified_version, page = page, action = 'modified')
    print "created page_change '%s' for page '%s'" % (page_change.action, page_change.page.path, )
    
    file_handler = get_file_handler(page)
    page_content = open(page_path).read()
    paragraphs = file_handler.split(page_content, file_name = page_path)
    paragraphs_list = []
    matched_paragraphs = []
    for ordinal, paragraph_content in enumerate(paragraphs):
        N = paragraphs[:ordinal].count(paragraph_content)
#        print 'N %s' % (N)
        try:
            history_paragraph = Paragraph.objects.filter(page = page, current_version = before_version, original = paragraph_content)[N]
            if history_paragraph not in matched_paragraphs:       
#                print 'found history_paragraph %s' % (history_paragraph.id)
                matched_paragraphs.append(history_paragraph)
            else:
                history_paragraph = None
        except IndexError:
            history_paragraph = None
        paragraphs_list.append([paragraph_content, ordinal+ 1, history_paragraph, False])
        #(paragraph_content, ordinal+ 1, history_paragraph, 'handled?')
    print 'len(paragraphs_list) %s' % len([p for p in paragraphs_list if not p[3]])
    for i, p in enumerate(paragraphs_list):
        if p[2] and not p[3]:
            print 'updating history_paragraph %s' % (p[2].id)
            p[2].ordinal = p[1]
            p[2].current_version = modified_version
            p[2].save()
            p[3] = True
    print 'len(paragraphs_list) %s' % len([p for p in paragraphs_list if not p[3]])
    for i, p in enumerate(paragraphs_list):
        if not p[3]:
            paragraph_content_set = set(line.strip() for line in p[0].decode('utf-8').splitlines())
            for p_in_db in Paragraph.objects.filter(page = page, current_version = before_version, modified_paragraph = None):
                p_in_db_content_set = set(line.strip() for line in p_in_db.original.splitlines())
                similarity = 1.0 * len(paragraph_content_set&p_in_db_content_set) / len(paragraph_content_set)
                if similarity > min_similarity:
                    paragraph = Paragraph.objects.create(page = page, current_version = modified_version,
                                                         original = p[0], ordinal = p[1], history_paragraph = p_in_db)

                    history_translation =  p_in_db.latest_translation
                    if history_translation:
                        new_translation = Translation.objects.create(paragraph = paragraph, history_translation = history_translation,
                                                                     translator = history_translation.translator,
                                                                     content = history_translation.content,
                                                                     ip = history_translation.ip,
                                                                     )
                        
                    print 'similarity %s' % similarity
                    print 'paragraph %s(ord:%s) found history_paragraph %s(ord:%s)' % (paragraph.id, paragraph.ordinal, paragraph.history_paragraph.id, paragraph.history_paragraph.ordinal)
                    page_change.paragraphs.add(paragraph)
                    p[3] = True
                    break
    print 'len(paragraphs_list) %s' % len([p for p in paragraphs_list if not p[3]])
    for i, p in enumerate(paragraphs_list):
        if not p[3]:
            paragraph = Paragraph.objects.create(page = page, current_version = modified_version,
                                                 original = p[0], ordinal = p[1])
            page_change.paragraphs.add(paragraph)
            p[3] = True
    unhandled_length = len([p for p in paragraphs_list if not p[3]])
    print 'len(paragraphs_list) %s' % unhandled_length
    if unhandled_length != 0:
        raise ValidationError, u"paragraphs %s didn't handled " % (u",".join([str(p[1]) for p in paragraphs_list if not p[3]]), )
    
    paragraphs_count_in_db = Paragraph.objects.filter(page = page, current_version = modified_version).count()
    if len(paragraphs) != paragraphs_count_in_db:
        raise ValidationError, u"paragraphs count in file '%s' diff from count in db %s " % (len(paragraphs), paragraphs_count_in_db)
        
    page.check()
    
    print