def getProperty(prop_id): try: record = PropertyDAO().getRecord(prop_id)[0] property = Property(record[0],record[1],record[2],record[3]) return property.getJson(); except: return 'No record found!'
def populateTokens(self, property: Property, sentences): searchfor = [ 'professionally managed', 'no pets', 'parking stall', 'available now', 'building amenities', 'near school', 'brand new', 'suite laundry' ] if sentences is not None: for sentence in sentences: if sentence in searchfor: property.setToken(sentence) return property
def update_package_by_id(package_id): package = db_session.query(Package) \ .filter(Package.pid == package_id, or_(Package.last_updated.is_(None), Package.last_updated <= datetime.utcnow() - timedelta(hours=2))) \ .options(load_only(Package.owner, Package.repo, Package.path, Package.ptype, Package.date)) \ .first() if package: loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) try: loop.run_until_complete( asyncio.ensure_future(update_package(package))) last_updated_prop = Property("last_updated", date_val=datetime.utcnow()) db_session.merge(last_updated_prop) db_session.commit() except Exception as ex: LOGGER.error(ex) LOGGER.debug(traceback.format_exc()) finally: loop.close() return redirect(url_for("index"))
def __getitem__(self, key): retValue = None error = None if key is not None: dynamickey = key if '.' in key: dynamickey = key.split('.')[0] if dynamickey in self.properties: values = self.properties[dynamickey] if not isinstance(values, list): values = [values] if dynamickey != key: dynamickeyattributes = key.split('.')[1:] for attribute in dynamickeyattributes: newvalues = None for i, value in enumerate(values): if value: if attribute in value: if not newvalues: newvalues = [] newvalues.append(value.get(attribute)) else: error = Error( type=Error.MISSING_DYNAMIC_VALUE, message="No key: '" + str(attribute) + "' in object: " + dynamickey + " for dynamic key: '" + str(key)) break values = newvalues retValue = values return Property(key, retValue, error)
def add_mirror(): auth_check = check_auth() if auth_check: return auth_check url = request.args.get("url") if not url: return Response("url is empty, try '{}?url=http://url/'".format( request.url_root + request.path.lstrip("/")), 400, mimetype="text/plain") try: resp = requests.head(url) if resp.status_code != 200: return Response("'{}' not available. Status code was {}\n".format( url, resp.status_code), 400, mimetype="text/plain") except Exception as ex: return Response("Error occured while checking url: {}".format(ex), 400, mimetype="text/plain") mirrors = db_session.query(Property) \ .filter(Property.identifier.like("MIRROR_%")) \ .options(load_only(Property.identifier, Property.text_val)) \ .all() duplicate = False for mirror in mirrors: if mirror.text_val == url: duplicate = True break if duplicate: return Response("'{}' is already a mirror.\n".format(url), 200, mimetype="text/plain") if mirrors: new_mirror_nr = max( [int(mirror.identifier[len("MIRROR_"):]) for mirror in mirrors]) + 1 else: new_mirror_nr = 0 new_mirror = Property("MIRROR_" + str(new_mirror_nr), text_val=url) db_session.add(new_mirror) db_session.commit() return Response("'{}' added as mirror.\n".format(url), 200, mimetype="text/plain")
def populateRoomSize(self, property: Property): sp = property.characteristics.split('-') for s in sp: if s is not None: s = str(s) if 'br' in s: rooms = s.replace('br', '').strip() property.setRooms(rooms) property.setUpdate() elif 'ft_sq' in s: size = s.replace('ft_sq', '').strip() property.setSize(size) property.setUpdate() return property
from db.postgresl import PropertyDAO from model.Property import Property from service.TextMiningService import TextMiningService propertyDao = PropertyDAO() textMiningService = TextMiningService() rows = propertyDao.getRecords(500) #rows = propertyDao.getRecord(6829944535); print("Records: ", len(rows)) records = [] for row in rows: records.append(Property(row[0], row[1].encode("utf-8"))) text = "" print("\nShow me the first 5 records: ", "\n") for x in range(0, len(rows)): text += str(records[x].description) + "\n" if x < 5: print("Id: ", records[x].id, " Desc: ", records[x].description) # remove special characters text = textMiningService.removeSpecialCharacters(text) tokenized_text = textMiningService.getSentenceTokenize(text) print(tokenized_text)
def test(): record = PropertyDAO().getRecord(6842453594)[0] property = Property(record[0],record[1],record[2],record[3]) return property.getJson();
def tryGetBedroomFromDescription(self, property: Property): numbers = ['one', 'two', 'three', 'four', 'five'] # writed numbers to be found exact = ['1bd', '2bd', '3bd', '4bd', '5bd'] # numbers with bd together pattern exact2 = ['1-br', '2-br', '3-br', '4-br', '5-br'] # numbers with bd together pattern exact3 = ['1bed', '2bed', '3bed', '4bed', '5bed'] # numbers with bd together pattern desc = str(property.title).lower() + ' ' + str( property.description).lower() # parse the texto to lowercase words = desc.split(' ') for i in range(0, len(words)): if words[ i] in exact: # check for a specific pattern in text and return the exact number of rooms from array position r = exact.index(words[i]) + 1 property.setRooms(r) property.setUpdate() return property if words[ i] in exact2: # check for a specific pattern in text and return the exact number of rooms from array position r = exact2.index(words[i]) + 1 property.setRooms(r) property.setUpdate() return property if words[ i] in exact3: # check for a specific pattern in text and return the exact number of rooms from array position r = exact3.index(words[i]) + 1 property.setRooms(r) property.setUpdate() return property # check for a split pattern in text and apply algorithm to identify the number if words[i] in [ 'bedroom', 'bedrooms', 'bed', 'br', 'brm', 'bdrm', 'bdr' ] or 'bed' in words[i] or 'bd' in words[i]: lw = str(words[i - 1]).strip() if (lw in numbers): lw = numbers.index(lw) + 1 r = -1 try: r = int(lw) except: r = -1 if r == -1: try: r = float(lw) except: r = -1 if r >= 0 and r <= 7: # if the number is too high probably is not right property.setRooms(r) property.setUpdate() return property if 'studio' in words or 'bachelor ' in desc or 'bachlor ' in desc: property.setRooms(1) property.setUpdate() return property find = re.search("\d{1,5}[b][d]", desc) if find: size = str(find.group()).replace('bd', '').strip() r = -1 try: r = float(size) if r >= 0 and r <= 7: property.setRooms(r) property.setUpdate() return property except: r = -1 return property
def tryGetBathFromDescription(self, property: Property): numbers = ['one', 'two', 'three', 'four', 'five'] # writed numbers to be found desc = str(property.characteristics).lower() + ' ' + str( property.title).lower() + ' ' + str( property.description).lower() # parse the texto to lowercase desc = desc.replace('\\xc2', ' ').replace('\\xa0', ' ') #clean dirty desc = desc.replace(' ', ' ').replace('+', ' ').replace('/', ' ').replace( '-', ' ') #clean dirty desc = desc.replace('full', '').replace('private', '').replace( ' ', ' ' ) #replace word full there is found between the bathroom word and number words = desc.split(' ') wordContains = ['bath', 'bths'] if 'one and half ba' in desc: property.setBath(1.5) property.setUpdate() return property find = re.search("\d{1,5}ba|\d[.]\d{1,5}ba", desc) if find: bath = str(find.group()).replace('ba', '').strip() r = -1 try: r = float(bath) if r > 0 and r <= 7: property.setBath(r) property.setUpdate() return property except: pass # check for a split pattern in text and apply algorithm to identify the number for i in range(0, len(words)): #check if word has exact pattern to search for number in next word if words[i] in [ 'bath:', 'bathroom:', 'bathrooms:', 'bathroom(s):' ]: lw = str(words[i + 1]).strip().replace('+', '').replace(',', '') if (lw in numbers): lw = numbers.index(lw) + 1 r = -1 try: r = float(lw) if r > 0 and r <= 7: property.setBath(r) property.setUpdate() return property except: pass #check if word contains pattern to search for number in previous word if any(w in words[i] for w in wordContains): lw = str(words[i - 1]).strip().replace('+', '').replace( ',', '.').replace('/', '') if (lw in numbers): lw = numbers.index(lw) + 1 r = -1 try: r = float(lw) if r > 0 and r <= 7: property.setBath(r) property.setUpdate() return property except: pass if 'bath' in desc: property.setBath(1) property.setUpdate() return property return property
def tryGetSizeFromDescription(self, property: Property): desc = str(property.characteristics).lower() + ' ' + str( property.title).lower() + ' ' + str( property.description).lower() # parse the texto to lowercase words = desc.split(' ') for i in range(0, len(words)): # check for a split pattern in text and apply algorithm to identify the number if words[i] in [ 'square', 'sqft', 'sq.', 'sqt', 'sqf', 'sqft)', 'sq.ft.', 'sqft.', 'sq', 'sqft,', 'sf', 'sq.ft', 'sq.ft.,', 'sqft).', 'sq/ft', 'sq.ft' ]: lw = str(words[i - 1]).strip().replace('+', '').replace(',', '') r = -1 try: r = int(lw) except: r = -1 if r == -1: try: r = float(lw) except: r = -1 if r >= 100: property.setSize(r) property.setUpdate() return property if words[i] in ['sq/ft:', 'footage:']: lw = str(words[i + 1]).strip().replace('+', '').replace(',', '') r = -1 try: r = int(lw) except: r = -1 if r == -1: try: r = float(lw) except: r = -1 if r >= 100: property.setSize(r) property.setUpdate() return property find = re.search("\d{1,5}.sq", desc) if not find: find = re.search("\d{1,5}.ft", desc) if find: size = str(find.group()).replace('sq', '').replace('ft', '').strip() r = -1 try: r = float(size) if r >= 100: property.setSize(r) property.setUpdate() return property except: r = -1 return property
def packages_json_generate(): yield '{{"name":"{}","packages":['.format(REPO_NAME) cached_packages = db_session.query(Package) \ .filter(Package.last_updated.isnot(None), Package.last_update_successful, Package.last_updated >= datetime.utcnow() - timedelta(hours=24)) \ .options(load_only(Package.owner, Package.name, Package.description, Package.filename, Package.date, Package.version, Package.download_url, Package.homepage)) iter_cached_packages = iter(cached_packages) package = next(iter_cached_packages, None) if package: yield json_dump_package(package) for package in iter_cached_packages: yield "," + json_dump_package(package) update_packages = db_session.query(Package) \ .filter(or_(Package.last_updated.is_(None), and_(Package.last_update_successful, Package.last_updated < datetime.utcnow() - timedelta(hours=24)), and_(not_(Package.last_update_successful), Package.last_updated < datetime.utcnow() - timedelta(hours=4)))) \ .options(load_only(Package.owner, Package.repo, Package.path, Package.ptype, Package.date)) loop = asyncio.new_event_loop() asyncio.set_event_loop(loop) update_tasks = [ asyncio.ensure_future(update_package(package)) for package in update_packages ] iter_update_tasks = asyncio.as_completed(update_tasks) if not package: update_task = next(iter_update_tasks, None) if update_task: updated_package = None try: updated_package = loop.run_until_complete(update_task) except Exception as ex: LOGGER.error(ex) LOGGER.debug(traceback.format_exc()) if updated_package: yield json_dump_package(updated_package) for update_task in iter_update_tasks: try: updated_package = loop.run_until_complete(update_task) except Exception as ex: LOGGER.error(ex) LOGGER.debug(traceback.format_exc()) continue if updated_package: yield "," + json_dump_package(updated_package) loop.close() if update_tasks: last_updated_prop = Property("last_updated", date_val=datetime.utcnow()) last_updated_prop = db_session.merge(last_updated_prop) db_session.commit() last_updated = last_updated_prop.date_val else: last_updated = db_session.query(Property.date_val).filter( Property.identifier == "last_updated").scalar() yield '],"last_updated":"{}"}}'.format( last_updated.isoformat() if last_updated else "")
from model.Property import Property from service.TextMiningService import TextMiningService propertyDao = PropertyDAO() textMiningService = TextMiningService() text = "" rows = propertyDao.getRecords(50) # rows = propertyDao.getRecord(6829944535); # print("Records: ", len(rows)) records = [] for row in rows: records.append( Property(row['_id'], row['house_description'].encode("utf-8"))) print("\nShow me the first 5 records: ", "\n") for x in range(0, len(records)): text += str(records[x].description) + "\n" if x < 5: print("Id: ", records[x].id, " Desc: ", records[x].description) # remove special characters text = textMiningService.removeSpecialCharacters(text) tokenized_text = textMiningService.getSentenceTokenize(text) print(tokenized_text) tokenized_word = textMiningService.getWordTokenize(text) print(tokenized_word)
from db.postgresl import PropertyDAO from model.Property import Property propertyDao = PropertyDAO() propertyService = PropertyService() sklearnService = SkLearnService() textMiningService = TextMiningService() rows = propertyDao.getRecordsWithNoLocation() print("Records: ", len(rows)) records = [] for row in rows: records.append( Property(row[0], row[1].encode("utf-8"), row[2], row[3], row[4])) size = len(rows) count = 0 result = defaultdict(list) for property in records: property = propertyService.populateRoomSize(property) sentences = propertyService.getSentences(str(property.description)) property.setLocation( propertyService.getLocationFromSentences(sentences, property.link)) if len(property.location) > 0: result[property.location].append(property.id) print(property.id, ' - location: ', property.location) #propertyDao.updateRecord(property) count += 1 try: