Example #1
0
def upload_file(*args, **kwargs):
    domain_content_connector = factory.get_entity_data_connector()
    try:
        domain_file = kwargs.get("file_upload")
        domain_name = kwargs.get("name")
        domain_description = kwargs.get("description")
        if not db.domain_exists(domain_name):
            if domain_file is not None:
                tangelo.log("read domain file")
                domain_file_lines = domain_file.file.readlines()
                domain_file_lines = map(lambda x: x.strip().replace('\0',''), domain_file_lines)
                db.add_new_domain(domain_name, domain_description)
                rowkeys = []
                for line in domain_file_lines:
                    i = line.index(',')   # split on the first comma
                    type = line[:i]
                    value = line[i+1:]
                    if type[0] == '"' and type[len(type)-1] == '"': type = type[1:-1]
                    if value[0] == '"' and value[len(value)-1] == '"': value = value[1:-1]
                    rowkeys.append( domain_name+'\0'+type+'\0'+value )
                result = domain_content_connector.add_new_domain_items(rowkeys)
                return json.dumps(dict(success=result))
            else:
                return json.dumps(dict(success=False))
        else:
            return json.dumps(dict(success=False))
    finally:
        domain_content_connector.close()
def upload_database(domain_name, domain_description):
    if not db.domain_exists(domain_name):
        db.add_new_domain(domain_name, domain_description)
        kwargs = dict(domain_name=domain_name, domain_description=domain_description)
        database_upload_thread = threading.Thread(target=upload_database_threaded, kwargs=kwargs)
        database_upload_thread.daemon = True
        database_upload_thread.start()
        return json.dumps(dict(success=True))
    return json.dumps(dict(success=False))
Example #3
0
def upload_database(*args, **kwargs):
    domain_name = kwargs.get("domain_name")
    domain_description = kwargs.get("domain_description")
    if not db.domain_exists(domain_name):
        db.add_new_domain(domain_name, domain_description)
        database_upload_thread = threading.Thread(target=upload_database_threaded, kwargs=kwargs)
        database_upload_thread.daemon = True
        database_upload_thread.start()
        return {'success': True}
    else:
        return {'success': False}
Example #4
0
def get(domain,trail,stars,newdomain):
    org = helper.get_org().upper()

    if not db.domain_exists(newdomain):
        db.add_new_domain(newdomain,'auto generated domain from trail: '+trail)

    features = set([])
    url_set = set([])
    stars = int(stars)
    # get all stared urls for the trail


    for (url,rank) in db.getRankedUrls(org,trail,domain):
        url_set.add(url)

    if stars < 1:
        urls = db.getBrowsePathUrls(org,trail)
        for url in urls:
           url_set.add(url)


    # get the list of invalid entities for the domain
    markedEntities = set([])
    for (type,value) in db.get_marked_entities_for_domain(org, domain):
        markedEntities.add(value)


    # for each url get all extracted entities
    entity_data_connector = factory.get_entity_data_connector()
    all_entities = entity_data_connector.get_extracted_entities_from_urls(url_set)
    for url,featureDict in all_entities.iteritems():
        for type,values in featureDict.iteritems():
            type = type.replace(',',' ')
            filtered_values = []
            for value in values:
                if value not in markedEntities:
                    value = value.replace(',',' ')
                    features.add(type+"\0"+value)



    # for each url get any manually extracted entities
    for url in url_set:
        for featureObj in db.get_feedback_entities(org, domain, url):
            type = featureObj['type'].replace(',',' ')
            value = featureObj['value'].replace(',',' ')
            features.add(type+"\0"+value)





    entity_data_connector.add_new_domain_items( map(lambda x: newdomain+'\0'+x,features))
Example #5
0
def set_user(user):
    """
    Set the user object.
    If this user is loging in for the first time automatically create a private team and a blank domain
    :param user:
    :return:
    """
    teams = datawake_mysql.getTeams(email=user.get_email())
    if len(teams) == 0:
        # create a team for the new user.
        (team_id,team_name) = datawake_mysql.createTeam(user.get_email(),'Auto generated private team.',emails=[user.get_email()])

        teams = [ (team_id,team_name) ]
        datawake_mysql.add_new_domain(team_id,"Empty", "An empty domain. Created by default for each team.")
    user.set_teams(teams)
    cherrypy.session['user'] = user
    return True
def loadDomain(name, description, filename):
    print "\nloading domain: ", name, "\ndescription: ", description, "\nfrom path: ", filename, "\n"

    # if db.domain_exists(name):
    #    raise ValueError("Domain already exists: "+name)

    fobj = open(filename, "r")

    domain_content_connector = factory.get_entity_data_connector()
    try:
        cnt = 0
        items = []
        for row in fobj:
            row = row.strip()
            row = row.replace("\0", "")
            cnt = cnt + 1
            i = row.index(",")
            attr = row[:i]
            value = row[i + 1 :]
            if attr[0] == '"' and attr[len(type) - 1] == '"':
                attr = attr[1:-1]
            if value[0] == '"' and value[len(value) - 1] == '"':
                value = value[1:-1]
            items.append(name + "\0" + attr + "\0" + value)
            if cnt % 1000 == 0:
                domain_content_connector.add_new_domain_items(items)
                items = []
            if cnt % 1000 == 0:
                print "added: ", cnt
        if len(items) > 0:
            domain_content_connector.add_new_domain_items(items)

        print "added new items cnt=", cnt

        db.add_new_domain(name, description)
    finally:
        fobj.close()
        domain_content_connector.close()
def upload_file(file_upload, name, description):
    tangelo.log("Loading new domain: "+name)
    domain_content_connector = factory.get_entity_data_connector()
    try:
        if not db.domain_exists(name):
            if file_upload is not None:
                domain_file_lines = file_upload.file.readlines()
                domain_file_lines = map(lambda x: x.strip().replace('\0', ''), domain_file_lines)
                db.add_new_domain(name, description)
                rowkeys = []
                for line in domain_file_lines:
                    i = line.index(',')  # split on the first comma
                    type = line[:i]
                    value = line[i + 1:]
                    if type[0] == '"' and type[len(type) - 1] == '"': type = type[1:-1]
                    if value[0] == '"' and value[len(value) - 1] == '"': value = value[1:-1]
                    rowkeys.append("%s\0%s\0%s" % (name, type, value))
                result = domain_content_connector.add_new_domain_items(rowkeys)
                return json.dumps(dict(success=result))
            return json.dumps(dict(success=False))
        return json.dumps(dict(success=False))
    finally:
        domain_content_connector.close()
Example #8
0
def upload_file(team_id, name, description, features):

    # tangelo.log("Loading new domain: ")
    # tangelo.log(name)
    # tangelo.log(description)
    # tangelo.log(features)

    domain_id = db.add_new_domain(team_id, name, description)

    domain_content_connector = factory.get_entity_data_connector()

    try:
        domain_content_connector.add_new_domain_items(domain_id, features)
    except:
        db.remove_domain(domain_id)
        raise
    finally:
        domain_content_connector.close()

    newdomain = dict(id=domain_id, name=name, description=description)
    # tangelo.log("loaded new domain")
    # tangelo.log(newdomain)
    return json.dumps(newdomain)
Example #9
0
def add_team(name, description=""):
    user = helper.get_user()
    team = db.addTeam(name, description, user.get_email())
    db.add_new_domain(team["id"], "Empty", "An empty domain. Created by default for each team.")
    return json.dumps(team)