def upload_file(*args, **kwargs): domain_content_connector = factory.get_entity_data_connector() try: domain_file = kwargs.get("file_upload") domain_name = kwargs.get("name") domain_description = kwargs.get("description") if not db.domain_exists(domain_name): if domain_file is not None: tangelo.log("read domain file") domain_file_lines = domain_file.file.readlines() domain_file_lines = map(lambda x: x.strip().replace('\0',''), domain_file_lines) db.add_new_domain(domain_name, domain_description) rowkeys = [] for line in domain_file_lines: i = line.index(',') # split on the first comma type = line[:i] value = line[i+1:] if type[0] == '"' and type[len(type)-1] == '"': type = type[1:-1] if value[0] == '"' and value[len(value)-1] == '"': value = value[1:-1] rowkeys.append( domain_name+'\0'+type+'\0'+value ) result = domain_content_connector.add_new_domain_items(rowkeys) return json.dumps(dict(success=result)) else: return json.dumps(dict(success=False)) else: return json.dumps(dict(success=False)) finally: domain_content_connector.close()
def upload_database(domain_name, domain_description): if not db.domain_exists(domain_name): db.add_new_domain(domain_name, domain_description) kwargs = dict(domain_name=domain_name, domain_description=domain_description) database_upload_thread = threading.Thread(target=upload_database_threaded, kwargs=kwargs) database_upload_thread.daemon = True database_upload_thread.start() return json.dumps(dict(success=True)) return json.dumps(dict(success=False))
def upload_database(*args, **kwargs): domain_name = kwargs.get("domain_name") domain_description = kwargs.get("domain_description") if not db.domain_exists(domain_name): db.add_new_domain(domain_name, domain_description) database_upload_thread = threading.Thread(target=upload_database_threaded, kwargs=kwargs) database_upload_thread.daemon = True database_upload_thread.start() return {'success': True} else: return {'success': False}
def get(domain,trail,stars,newdomain): org = helper.get_org().upper() if not db.domain_exists(newdomain): db.add_new_domain(newdomain,'auto generated domain from trail: '+trail) features = set([]) url_set = set([]) stars = int(stars) # get all stared urls for the trail for (url,rank) in db.getRankedUrls(org,trail,domain): url_set.add(url) if stars < 1: urls = db.getBrowsePathUrls(org,trail) for url in urls: url_set.add(url) # get the list of invalid entities for the domain markedEntities = set([]) for (type,value) in db.get_marked_entities_for_domain(org, domain): markedEntities.add(value) # for each url get all extracted entities entity_data_connector = factory.get_entity_data_connector() all_entities = entity_data_connector.get_extracted_entities_from_urls(url_set) for url,featureDict in all_entities.iteritems(): for type,values in featureDict.iteritems(): type = type.replace(',',' ') filtered_values = [] for value in values: if value not in markedEntities: value = value.replace(',',' ') features.add(type+"\0"+value) # for each url get any manually extracted entities for url in url_set: for featureObj in db.get_feedback_entities(org, domain, url): type = featureObj['type'].replace(',',' ') value = featureObj['value'].replace(',',' ') features.add(type+"\0"+value) entity_data_connector.add_new_domain_items( map(lambda x: newdomain+'\0'+x,features))
def get(domain, trail, stars, newdomain): org = helper.get_org().upper() if not db.domain_exists(newdomain): db.add_new_domain(newdomain, 'auto generated domain from trail: ' + trail) features = set([]) url_set = set([]) stars = int(stars) # get all stared urls for the trail for (url, rank) in db.getRankedUrls(org, trail, domain): url_set.add(url) if stars < 1: urls = db.getBrowsePathUrls(org, trail) for url in urls: url_set.add(url) # get the list of invalid entities for the domain markedEntities = set([]) for (type, value) in db.get_marked_entities_for_domain(org, domain): markedEntities.add(value) # for each url get all extracted entities entity_data_connector = factory.get_entity_data_connector() all_entities = entity_data_connector.get_extracted_entities_from_urls( url_set) for url, featureDict in all_entities.iteritems(): for type, values in featureDict.iteritems(): type = type.replace(',', ' ') filtered_values = [] for value in values: if value not in markedEntities: value = value.replace(',', ' ') features.add(type + "\0" + value) # for each url get any manually extracted entities for url in url_set: for featureObj in db.get_feedback_entities(org, domain, url): type = featureObj['type'].replace(',', ' ') value = featureObj['value'].replace(',', ' ') features.add(type + "\0" + value) entity_data_connector.add_new_domain_items( map(lambda x: newdomain + '\0' + x, features))
def set_user(user): """ Set the user object. If this user is loging in for the first time automatically create a private team and a blank domain :param user: :return: """ teams = datawake_mysql.getTeams(email=user.get_email()) if len(teams) == 0: # create a team for the new user. (team_id,team_name) = datawake_mysql.createTeam(user.get_email(),'Auto generated private team.',emails=[user.get_email()]) teams = [ (team_id,team_name) ] datawake_mysql.add_new_domain(team_id,"Empty", "An empty domain. Created by default for each team.") user.set_teams(teams) cherrypy.session['user'] = user return True
def set_user(user): """ Set the user object. If this user is loging in for the first time automatically create a private team and a blank domain :param user: :return: """ teams = datawake_mysql.getTeams(email=user.get_email()) if len(teams) == 0: # create a team for the new user. (team_id, team_name) = datawake_mysql.createTeam(user.get_email(), 'Auto generated private team.', emails=[user.get_email()]) teams = [(team_id, team_name)] datawake_mysql.add_new_domain( team_id, "Empty", "An empty domain. Created by default for each team.") user.set_teams(teams) cherrypy.session['user'] = user return True
def loadDomain(name, description, filename): print "\nloading domain: ", name, "\ndescription: ", description, "\nfrom path: ", filename, "\n" # if db.domain_exists(name): # raise ValueError("Domain already exists: "+name) fobj = open(filename, "r") domain_content_connector = factory.get_entity_data_connector() try: cnt = 0 items = [] for row in fobj: row = row.strip() row = row.replace("\0", "") cnt = cnt + 1 i = row.index(",") attr = row[:i] value = row[i + 1 :] if attr[0] == '"' and attr[len(type) - 1] == '"': attr = attr[1:-1] if value[0] == '"' and value[len(value) - 1] == '"': value = value[1:-1] items.append(name + "\0" + attr + "\0" + value) if cnt % 1000 == 0: domain_content_connector.add_new_domain_items(items) items = [] if cnt % 1000 == 0: print "added: ", cnt if len(items) > 0: domain_content_connector.add_new_domain_items(items) print "added new items cnt=", cnt db.add_new_domain(name, description) finally: fobj.close() domain_content_connector.close()
def upload_file(file_upload, name, description): tangelo.log("Loading new domain: "+name) domain_content_connector = factory.get_entity_data_connector() try: if not db.domain_exists(name): if file_upload is not None: domain_file_lines = file_upload.file.readlines() domain_file_lines = map(lambda x: x.strip().replace('\0', ''), domain_file_lines) db.add_new_domain(name, description) rowkeys = [] for line in domain_file_lines: i = line.index(',') # split on the first comma type = line[:i] value = line[i + 1:] if type[0] == '"' and type[len(type) - 1] == '"': type = type[1:-1] if value[0] == '"' and value[len(value) - 1] == '"': value = value[1:-1] rowkeys.append("%s\0%s\0%s" % (name, type, value)) result = domain_content_connector.add_new_domain_items(rowkeys) return json.dumps(dict(success=result)) return json.dumps(dict(success=False)) return json.dumps(dict(success=False)) finally: domain_content_connector.close()
def loadDomain(name, description, filename): print '\nloading domain: ', name, '\ndescription: ', description, '\nfrom path: ', filename, '\n' #if db.domain_exists(name): # raise ValueError("Domain already exists: "+name) fobj = open(filename, 'r') domain_content_connector = factory.get_entity_data_connector() try: cnt = 0 items = [] for row in fobj: row = row.strip() row = row.replace("\0", '') cnt = cnt + 1 i = row.index(',') attr = row[:i] value = row[i + 1:] if attr[0] == '"' and attr[len(type) - 1] == '"': attr = attr[1:-1] if value[0] == '"' and value[len(value) - 1] == '"': value = value[1:-1] items.append(name + '\0' + attr + '\0' + value) if cnt % 1000 == 0: domain_content_connector.add_new_domain_items(items) items = [] if cnt % 1000 == 0: print 'added: ', cnt if len(items) > 0: domain_content_connector.add_new_domain_items(items) print 'added new items cnt=', cnt db.add_new_domain(name, description) finally: fobj.close() domain_content_connector.close()
def upload_file(team_id, name, description, features): # tangelo.log("Loading new domain: ") # tangelo.log(name) # tangelo.log(description) # tangelo.log(features) domain_id = db.add_new_domain(team_id, name, description) domain_content_connector = factory.get_entity_data_connector() try: domain_content_connector.add_new_domain_items(domain_id, features) except: db.remove_domain(domain_id) raise finally: domain_content_connector.close() newdomain = dict(id=domain_id, name=name, description=description) # tangelo.log("loaded new domain") # tangelo.log(newdomain) return json.dumps(newdomain)
def upload_file(team_id, name, description, features): #tangelo.log("Loading new domain: ") #tangelo.log(name) #tangelo.log(description) #tangelo.log(features) domain_id = db.add_new_domain(team_id, name, description) domain_content_connector = factory.get_entity_data_connector() try: domain_content_connector.add_new_domain_items(domain_id, features) except: db.remove_domain(domain_id) raise finally: domain_content_connector.close() newdomain = dict(id=domain_id, name=name, description=description) #tangelo.log("loaded new domain") #tangelo.log(newdomain) return json.dumps(newdomain)
def add_team(name, description=''): user = helper.get_user() team = db.addTeam(name, description, user.get_email()) db.add_new_domain(team['id'], "Empty", "An empty domain. Created by default for each team.") return json.dumps(team)
def add_team(name, description=""): user = helper.get_user() team = db.addTeam(name, description, user.get_email()) db.add_new_domain(team["id"], "Empty", "An empty domain. Created by default for each team.") return json.dumps(team)