Example #1
0
def read_image_from_pdf(Path, FileName):
    pdf = file(FileName, "rb").read()
    startmark = "\xff\xd8"
    startfix = 0
    endmark = "\xff\xd9"
    endfix = 2
    i = 0
    njpg = 0
    Images = []
    while True:
        istream = pdf.find("stream", i)
        if istream < 0:
            break
        istart = pdf.find(startmark, istream, istream + 20)
        if istart < 0:
            i = istream + 20
            continue
        iend = pdf.find("endstream", istart)
        if iend < 0:
            raise Exception("Didn't find end of stream!")
        iend = pdf.find(endmark, iend - 20)
        if iend < 0:
            raise Exception("Didn't find end of JPG!")

        istart += startfix
        iend += endfix
        print "JPG %d from %d to %d" % (njpg, istart, iend)
        jpg = pdf[istart:iend]
        NewFileName = Path + "/jpg%d.jpg" % njpg
        jpgfile = file(NewFileName, "wb")
        jpgfile.write(jpg)
        jpgfile.close()
        Images.append(NewFileName)

        njpg += 1
        i = iend
    ImagesObjects = []
    ordering = 0
    for item in Images:
        ordering = ordering + 1
        f = open(item)
        NewObject = ImagesSimple(order=ordering)
        print str(f)
        Name = "%i.jpg" % (ordering)
        NewObject.path.save(Name, File(f))
        NewObject.save()
        ImagesObjects.append(NewObject)

    return ImagesObjects
Example #2
0
def read_catalog(Start, Catalog):
    Ordering = 0
    ProductionElecticSimple.objects.filter(brand=Catalog.brand).delete()

    for path in os.listdir(Start):
        print path + ":"
        ImagesFromPdf = None
        TitleImage = None
        Object = None
        Ordering = Ordering + 1
        for PreItem in os.listdir(Start + "/" + path):
            item = Start + "/" + path + "/" + PreItem
            if ('application/pdf', None) == mimetypes.guess_type(item):
                print "pdf  - " + item
                ImagesFromPdf = read_image_from_pdf(Start + "/" + path, item)

            if ('text/plain', None) == mimetypes.guess_type(item):
                with open(item) as f:  # Use file to refer to the file object
                    Description = ""
                    MetaDescription = ""
                    for line in f:
                        line = line.decode('cp1251').encode('utf8')
                        Description = Description + "<p>"
                        Description = Description + line
                        MetaDescription = MetaDescription + line
                        Description = Description + "</p>"
                meta_keys = ",".join(MetaDescription.split())
                Object = ProductionElecticSimple(title=path,
                                                 text=Description,
                                                 meta_description=MetaDescription,
                                                 ordering=Ordering,
                                                 brand=Catalog.brand,
                                                 meta_keyword=meta_keys
                                                 )

            if ('image/jpeg', None) == mimetypes.guess_type(item):
                f = open(item)
                NewObject = ImagesSimple(order=0)

                NewObject.path.save(PreItem, File(f))
                NewObject.save()
                TitleImage = NewObject

        save_catalog_position(PreItem, Object, TitleImage, ImagesFromPdf)