コード例 #1
0
    def calculate_result(self, url, userPK, **kwargs):
        print("importing project : {}".format(url))
        response=get_response(url)
        goo=response.read()
        dom=html.fromstring(goo)
        #print(dom.xpath('//*[contains(@class,\'thing-header-data\')]/h1/text()'))
        
        # Getting some metadatas for the project.
        #this is probably fine. If you're confused, feel free to make it more verbose.
        project=Project()
        project.author_id = userPK# User.objects.get(pk=userPK)
        project.title = dom.xpath('//*[contains(@class,\'thing-header-data\')]/h1/text()')[0].strip()
        tags = dom.xpath("//*[contains(@class,\'thing-info-content thing-detail-tags-container\')]/div/a/text()")
        
        project.draft=True
        
        if Project.objects.filter(title=project.title):
            import datetime
            project.title+= " -- "+str(datetime.datetime.today())
        
        project.save()
        for tag in tags:
           project.tags.add(tag)

	## get special text files. (readme, instructions, license)
        import html2text
        h2t = html2text.HTML2Text()
        #Get the reame file, do stuff to it.
        readme = etree.tostring(dom.xpath("//*[@id = 'description']")[0])
        readme = readme.encode("utf-8")
        readme = h2t.handle(readme)
	import unicodedata
	readmeItem=fileobject()
	readmeItem.parent=project#projectObject['SID']
        readmeItem.isReadme = True
	readmename="README.md"
	readmefile=u""+unicodedata.normalize('NFKD',readme).encode('ascii','ignore')
        print(readmename)
        print(readmefile)
        readmeItem.fromText(readmefile,readmename)
        readmeItem.save()
        project.bodyFile=readmeItem
        project.save()
        print("bodyFile:")
        print(project.bodyFile)
        #projectObject['readme'] = u""+unicodedata.normalize('NFKD',readme).encode('ascii','ignore')
        #also a markdown file I guess we'd want.
        try:
            instructions = etree.tostring(dom.xpath("//*[@id = 'instructions']")[0])
            instructions = u""+h2t.handle(instructions).encode('ascii','ignore')
            instructionItem=fileobject()
            instructionItem.parent=project#Object['SID']
            name="Instructions.md"
            filename=instructions
            instructionItem.fromText(filename,name)
            instructionItem.save()
        except IndexError:
            pass
            #print("xpath to get the instructions IndexError'd")

        ## now, because the format of the license on thingi is always the same, we can pull this off.
        ## but I expect it is rather fragile.
        licenseurl =dom.xpath("//*[contains(@class,\'license-text\')]/a/@href")[2].strip()
        licensetext = dom.xpath("//*[contains(@class,\'license-text\')]/a/text()")[1].strip()
        licenceItem=fileobject()
        licenceItem.parent=project#Object['SID']
        lname="License.md"
        lfile="["+licensetext+"]("+licenseurl+")"
        licenceItem.fromText(lfile,lname)
        licenceItem.save()

	## get all the projects image and file objects
        #grab files
        filelist = dom.xpath('//*[contains(@class,\'thing-file\')]/a/@href')
        #Grab only raw images.        
        imagelist = dom.xpath('//*[contains(@class,\'thing-gallery-thumbs\')]/div[@data-track-action="viewThumb"][@data-thingiview-url=""]/@data-large-url')
        fileurls=[urlparse.urljoin('http://www.thingiverse.com/', fl) for fl in imagelist+filelist]
        print("fileurls:")
        print(fileurls)
        bundle_o_tasks=[]
        for fileurl in fileurls:
            bundle_o_tasks+=[ThingiFileTask().si(url=fileurl,projectPK=project.pk)]
        filetask = chord(bundle_o_tasks)
        filetask(ResaveProjectTask().si(projectPK=project.pk))
        return(project.title)