def upload_file(*args, **kwargs): domain_content_connector = factory.get_entity_data_connector() try: domain_file = kwargs.get("file_upload") domain_name = kwargs.get("name") domain_description = kwargs.get("description") if not db.domain_exists(domain_name): if domain_file is not None: tangelo.log("read domain file") domain_file_lines = domain_file.file.readlines() domain_file_lines = map(lambda x: x.strip().replace('\0',''), domain_file_lines) db.add_new_domain(domain_name, domain_description) rowkeys = [] for line in domain_file_lines: i = line.index(',') # split on the first comma type = line[:i] value = line[i+1:] if type[0] == '"' and type[len(type)-1] == '"': type = type[1:-1] if value[0] == '"' and value[len(value)-1] == '"': value = value[1:-1] rowkeys.append( domain_name+'\0'+type+'\0'+value ) result = domain_content_connector.add_new_domain_items(rowkeys) return json.dumps(dict(success=result)) else: return json.dumps(dict(success=False)) else: return json.dumps(dict(success=False)) finally: domain_content_connector.close()
def delete_domain(domain_name): if db.domain_exists(domain_name): domain_content_connector = factory.get_entity_data_connector() db.remove_domain(domain_name) domain_content_connector.delete_domain_items(domain_name) return json.dumps(dict(success=True)) return json.dumps(dict(success=False))
def upload_database(domain_name, domain_description): if not db.domain_exists(domain_name): db.add_new_domain(domain_name, domain_description) kwargs = dict(domain_name=domain_name, domain_description=domain_description) database_upload_thread = threading.Thread(target=upload_database_threaded, kwargs=kwargs) database_upload_thread.daemon = True database_upload_thread.start() return json.dumps(dict(success=True)) return json.dumps(dict(success=False))
def delete_domain(*args, **kwargs): domain_name = kwargs.get("domain_name") for key in kwargs.keys(): tangelo.log(key) if db.domain_exists(domain_name): domain_content_connector = factory.get_entity_data_connector() db.remove_domain(domain_name) domain_content_connector.delete_domain_items(domain_name) return json.dumps(dict(success=True)) else: return json.dumps(dict(success=False))
def upload_database(*args, **kwargs): domain_name = kwargs.get("domain_name") domain_description = kwargs.get("domain_description") if not db.domain_exists(domain_name): db.add_new_domain(domain_name, domain_description) database_upload_thread = threading.Thread(target=upload_database_threaded, kwargs=kwargs) database_upload_thread.daemon = True database_upload_thread.start() return {'success': True} else: return {'success': False}
def get(domain,trail,stars,newdomain): org = helper.get_org().upper() if not db.domain_exists(newdomain): db.add_new_domain(newdomain,'auto generated domain from trail: '+trail) features = set([]) url_set = set([]) stars = int(stars) # get all stared urls for the trail for (url,rank) in db.getRankedUrls(org,trail,domain): url_set.add(url) if stars < 1: urls = db.getBrowsePathUrls(org,trail) for url in urls: url_set.add(url) # get the list of invalid entities for the domain markedEntities = set([]) for (type,value) in db.get_marked_entities_for_domain(org, domain): markedEntities.add(value) # for each url get all extracted entities entity_data_connector = factory.get_entity_data_connector() all_entities = entity_data_connector.get_extracted_entities_from_urls(url_set) for url,featureDict in all_entities.iteritems(): for type,values in featureDict.iteritems(): type = type.replace(',',' ') filtered_values = [] for value in values: if value not in markedEntities: value = value.replace(',',' ') features.add(type+"\0"+value) # for each url get any manually extracted entities for url in url_set: for featureObj in db.get_feedback_entities(org, domain, url): type = featureObj['type'].replace(',',' ') value = featureObj['value'].replace(',',' ') features.add(type+"\0"+value) entity_data_connector.add_new_domain_items( map(lambda x: newdomain+'\0'+x,features))
def get(domain, trail, stars, newdomain): org = helper.get_org().upper() if not db.domain_exists(newdomain): db.add_new_domain(newdomain, 'auto generated domain from trail: ' + trail) features = set([]) url_set = set([]) stars = int(stars) # get all stared urls for the trail for (url, rank) in db.getRankedUrls(org, trail, domain): url_set.add(url) if stars < 1: urls = db.getBrowsePathUrls(org, trail) for url in urls: url_set.add(url) # get the list of invalid entities for the domain markedEntities = set([]) for (type, value) in db.get_marked_entities_for_domain(org, domain): markedEntities.add(value) # for each url get all extracted entities entity_data_connector = factory.get_entity_data_connector() all_entities = entity_data_connector.get_extracted_entities_from_urls( url_set) for url, featureDict in all_entities.iteritems(): for type, values in featureDict.iteritems(): type = type.replace(',', ' ') filtered_values = [] for value in values: if value not in markedEntities: value = value.replace(',', ' ') features.add(type + "\0" + value) # for each url get any manually extracted entities for url in url_set: for featureObj in db.get_feedback_entities(org, domain, url): type = featureObj['type'].replace(',', ' ') value = featureObj['value'].replace(',', ' ') features.add(type + "\0" + value) entity_data_connector.add_new_domain_items( map(lambda x: newdomain + '\0' + x, features))
def upload_file(file_upload, name, description): tangelo.log("Loading new domain: "+name) domain_content_connector = factory.get_entity_data_connector() try: if not db.domain_exists(name): if file_upload is not None: domain_file_lines = file_upload.file.readlines() domain_file_lines = map(lambda x: x.strip().replace('\0', ''), domain_file_lines) db.add_new_domain(name, description) rowkeys = [] for line in domain_file_lines: i = line.index(',') # split on the first comma type = line[:i] value = line[i + 1:] if type[0] == '"' and type[len(type) - 1] == '"': type = type[1:-1] if value[0] == '"' and value[len(value) - 1] == '"': value = value[1:-1] rowkeys.append("%s\0%s\0%s" % (name, type, value)) result = domain_content_connector.add_new_domain_items(rowkeys) return json.dumps(dict(success=result)) return json.dumps(dict(success=False)) return json.dumps(dict(success=False)) finally: domain_content_connector.close()