def imageProcessing(urlid,imgList): config=Config() myConfiguration=config.getConfParser() out_folder=myConfiguration.get('files', 'img') urlproviderObj=Urlprovider() saveImgList=[] for img in imgList: try: newSize=(450,300) imgfile=out_folder+"/"+img im = Image.open(imgfile) imgwidth= im.size[0] imgheight= im.size[1] imgformat=im.format if imgwidth < 75 or imgheight < 50: os.remove(imgfile) else: extension=os.path.splitext(img)[1] newImgName=str(uuid.uuid1())+str(extension) im.thumbnail(newSize, Image.ANTIALIAS) im.save(out_folder+"/"+newImgName,imgformat) os.remove(imgfile) saveImgList.append(newImgName) except: logger.logInfo("in urldatagrapper.py open images : "+str(sys.exc_info()[0])) for saveimg in saveImgList: urlproviderObj.saveUrlIMG(urlid,saveimg)
def imageProcessing(urlid, imgList): config = Config() myConfiguration = config.getConfParser() out_folder = myConfiguration.get('files', 'img') urlproviderObj = Urlprovider() saveImgList = [] for img in imgList: try: newSize = (450, 300) imgfile = out_folder + "/" + img im = Image.open(imgfile) imgwidth = im.size[0] imgheight = im.size[1] imgformat = im.format if imgwidth < 75 or imgheight < 50: os.remove(imgfile) else: extension = os.path.splitext(img)[1] newImgName = str(uuid.uuid1()) + str(extension) im.thumbnail(newSize, Image.ANTIALIAS) im.save(out_folder + "/" + newImgName, imgformat) os.remove(imgfile) saveImgList.append(newImgName) except: logger.logInfo("in urldatagrapper.py open images : " + str(sys.exc_info()[0])) for saveimg in saveImgList: urlproviderObj.saveUrlIMG(urlid, saveimg)
#encoding:UTF-8 from dal.urlprovider import Urlprovider from helper.applog import AppLog from helper.stringhelper import stringHelper from helper.urlhelper import UrlHelper import sys logger = AppLog() strObj = stringHelper() ulrObj = UrlHelper() urlpObj = Urlprovider() try: posts = urlpObj.getPosts() for post in posts: postid = post['id'] postText = post['text'] postURLs = strObj.extractURL(postText) for url in postURLs: if url: orgurl = ulrObj.getRealURL(url) if orgurl: urlpObj.addURL(url, orgurl, postid) urlpObj.updatePostUrlExtracted(postid, 1) except:
newImgName=str(uuid.uuid1())+str(extension) im.thumbnail(newSize, Image.ANTIALIAS) im.save(out_folder+"/"+newImgName,imgformat) os.remove(imgfile) saveImgList.append(newImgName) except: logger.logInfo("in urldatagrapper.py open images : "+str(sys.exc_info()[0])) for saveimg in saveImgList: urlproviderObj.saveUrlIMG(urlid,saveimg) logger=AppLog() urlproviderObj=Urlprovider() urlHelpObj=UrlHelper() try: urls=urlproviderObj.getURLs() for url in urls: if httpExists(url['orgurl']): urlid=url['id'] soup=getUrlHTMLsoup(url['orgurl']) if soup: headrDict=urlHelpObj.getHeaderInfo(soup) urlproviderObj.addUrlHeaderInfo(urlid,headrDict['title'], headrDict['description']) getUrlImgs(url['orgurl'],urlid,soup) urlproviderObj.updateUrlStatus(urlid,1)
#encoding:UTF-8 from dal.urlprovider import Urlprovider from helper.applog import AppLog from helper.stringhelper import stringHelper from helper.urlhelper import UrlHelper import sys logger=AppLog() strObj=stringHelper() ulrObj=UrlHelper() urlpObj=Urlprovider() try: posts=urlpObj.getPosts() for post in posts: postid=post['id'] postText=post['text'] postURLs=strObj.extractURL(postText) for url in postURLs: if url: orgurl=ulrObj.getRealURL(url) if orgurl: urlpObj.addURL(url, orgurl, postid) urlpObj.updatePostUrlExtracted(postid,1) except:
extension = os.path.splitext(img)[1] newImgName = str(uuid.uuid1()) + str(extension) im.thumbnail(newSize, Image.ANTIALIAS) im.save(out_folder + "/" + newImgName, imgformat) os.remove(imgfile) saveImgList.append(newImgName) except: logger.logInfo("in urldatagrapper.py open images : " + str(sys.exc_info()[0])) for saveimg in saveImgList: urlproviderObj.saveUrlIMG(urlid, saveimg) logger = AppLog() urlproviderObj = Urlprovider() urlHelpObj = UrlHelper() try: urls = urlproviderObj.getURLs() for url in urls: if httpExists(url['orgurl']): urlid = url['id'] soup = getUrlHTMLsoup(url['orgurl']) if soup: headrDict = urlHelpObj.getHeaderInfo(soup) urlproviderObj.addUrlHeaderInfo(urlid, headrDict['title'], headrDict['description']) getUrlImgs(url['orgurl'], urlid, soup)