Beispiel #1
0
    def add_gene_rnd(self, gene):
        # agregar gene en una position aleatoria
        ## MUTACION FIGURA 3 B, Paper neat
        self.inovationNumber = self.inovationNumber + 1
        edge = self.get_rand_edge()
        #print(edge)
        weight_1 = self.getRnd()
        weight_2 = self.getRnd()

        self.genome.add_edge(edge[0][0], gene, weight=weight_1)
        self.genome[edge[0][0]][gene]['inN'] = self.inovationNumber

        conn1 = Conn(edge[0][0], weight_1)
        gene.add_conn(conn1)

        self.inovationNumber = self.inovationNumber + 1
        self.genome.add_edge(gene, edge[0][1], weight=weight_2)

        self.genome[gene][edge[0][1]]['inN'] = self.inovationNumber

        conn2 = Conn(gene, weight_2)
        edge[0][1].add_conn(conn2)

        self.genome.remove_edge(edge[0][0], edge[0][1])
        # edge[0][0] -> nodo origen
        # edge[0][1] -> nodo llegada
        edge[0][1].rm_conn(edge[0][0])  #pasar nodo de origen
Beispiel #2
0
def load_image(img_id):
    """Load image by id, save to temp
    Args:
	img_id(str): id of the image
    Return:
	img_path(str): image path of current image to be classify/detect)
    """
    file_path = None
    link = ''
    with Conn(database=conf.MONGO_DB,
              host=conf.MONGO_IP,
              port=conf.MONGO_PORT,
              collection=conf.MONGO_COL) as col:
        doc = col.find_one({'id': img_id})
        if doc:
            link = doc['link']
            try:
                page_source = requests.get(link)
                page_source = html.fromstring(page_source.text)
                image_path = page_source.xpath(
                    '//meta[@property="og:image"]/@content')[0]
                image_path = image_path.split('.jpg')[0] + '.jpg'
                file_path = conf.TEMP_PATH + img_id + '.jpg'
                save_image(image_path, file_path)
            except IndexError:
                file_path = None
    return file_path, link
Beispiel #3
0
    def update_conns(self, graph_n=None):

        with graph_n.as_default():
            for gene in self.genome.nodes_iter():
                gene.clear_conns()

                for pred in self.genome.predecessors(gene):
                    weight = self.genome[pred][gene]['weight']
                    NewConn = Conn(pred, weight)
                    gene.add_conn(NewConn)
Beispiel #4
0
    def add_genes(self, gene1, gene2, weight_v=1.0):
        ##MUTACION AGREGAR GENE
        self.inovationNumber = self.inovationNumber + 1
        # add a weigthed conexion of 1

        self.genome.add_edge(gene1, gene2, weight=weight_v)
        # so when we add a edge with a weight,
        conn = Conn(gene1, weight_v)
        gene2.add_conn(conn)

        #FIX IT
        self.genome[gene1][gene2]['inN'] = self.inovationNumber
Beispiel #5
0
import sys
reload(sys)
sys.setdefaultencoding( "utf-8" )

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
import jieba
from jieba import analyse
import MySQLdb
import numpy as np
from sklearn import metrics
from sklearn.svm import SVC
from conn import Conn

conn = Conn().getConnection()

def get_segment(all=False):
    cursor = conn.cursor()

    if True == all:
        # 找到全部文章的标题和内容
        sql = "select id, title, content from CrawlPage"
    else:
        # 找到尚未切词的文章的标题和内容
        sql = "select id, title, content from CrawlPage where segment is null"
    cursor.execute(sql)

    for row in cursor.fetchall():
        print "cutting %s" % row[1]
        # 对标题和内容切词
Beispiel #6
0
def processTeach():
    """
    Teaches the model with new knowledge.

    Required params:\n
    -action = 'teach'\n
    -stoken\n
    -datatype => text or blob\n
    -data => raw text or base64 blob\n
    -sentiment => 'positive', 'negative' or 'neutral'\n
    -tags => list of tags\n
    :return: A success message and 201
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    """noMissingData, data = getRequiredParameters(request, stoken='', action='', datatype='', data='', sentiment='', tags='')

    if not noMissingData:
        return jsonify(
            {
                'status':
                    {'message': 'Missing required parameter \'' + data + '\'',
                     'error': True},
                'data': None
            }), 400"""

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != "teach":
        return jsonify({
            'status': {
                'message': 'Wrong action. This endpoint is only for teach.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'text' and data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message':
                'Wrong datatype. This endpoint only accepts raw text as \'text\' and pdf or txt files as \'blob\'.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] == 'text':
        txtdata = str(data['data'])
        tags = data['tags']
        sentiment = data['sentiment']

        # remove escape characters, if any
        if type(txtdata) is str:
            txtdata = txtdata.replace("\\", "")

        if sentiment not in _labelAll:
            return jsonify({
                'status': {
                    'message': "Error in sentiments.",
                    'error': True
                },
                'data': None
            }), 400

        if con.getModelInfo(str(userID)) == [None, None]:
            return jsonify({
                'status': {
                    'message': "No model created yet.",
                    'error': True
                },
                'data': None
            }), 400

        modelID, modelName = con.getModelInfo(str(userID))
        model = Classifier(makeNewModel=False, modelName=modelName)
        processedText = model.train(text=txtdata,
                                    sentiment=data['sentiment'],
                                    tags=tags,
                                    returnProcessedText=True)

        con.addNewTraining(modelID=modelID,
                           tags=tags,
                           sentiment=data['sentiment'],
                           processedText=processedText,
                           rawText=txtdata)

        return jsonify({'status': RETURN_SUCCESS_STATUS, 'data': None}), 201
    elif data['datatype'] == 'blob':
        if type(data['data']) != list and type(data['data']) != str:
            return jsonify({
                'status': {
                    'error':
                    True,
                    'message':
                    'Data in the \'data\' field should be sent as a string or a list of items.'
                },
                'data': None
            }), 400

        if type(data['data']) == list:  # if multiple files
            status, files = list(), list()
            for item in data['data']:
                filename = str(uuid.uuid4())  # generate temp file name
                # retval is the filename when iferr = False or the error message when iferr = True
                iferr, retval = getFileBase64(item, filename, userID)

                # in case file received is not a text-based (.pdf/.txt) file
                if not retval.endswith('pdf') and not retval.endswith('txt'):
                    return jsonify({
                        'status': {
                            'error': True,
                            'message': "Wrong format for data."
                        },
                        'data': None
                    }), 400

                status.append(iferr)
                files.append(retval)

            if False in status:  # if any error during converting the file
                return jsonify(
                    {'status': {
                        'error': False,
                        'message': ERRORFILEEXTEN
                    }}), 400

            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            tags = data['tags']

            # get text for each file
            for eachFile in files:
                txtdata = readTextFileContents(eachFile, return_metadata=False)
                os.remove(eachFile)  # remove temp file
                processedText = model.train(text=txtdata,
                                            sentiment=data['sentiment'],
                                            tags=tags,
                                            returnProcessedText=True)
                con.addNewTraining(modelID=modelID,
                                   tags=tags,
                                   sentiment=data['sentiment'],
                                   processedText=processedText,
                                   rawText=txtdata)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': None
            }), 201
        elif type(data['data']) == str:  # if single file
            filename = str(uuid.uuid4())  # generate temp file name
            status, filepath = getFileBase64(request.json['data'], filename,
                                             userID)

            # in case file received is not a text-based (.pdf/.txt) file
            if not filepath.endswith('pdf') and not filepath.endswith('txt'):
                return jsonify({
                    'status': {
                        'error': True,
                        'message': "Wrong format for data."
                    },
                    'data': None
                }), 400

            if not status:
                return jsonify({
                    'status': {
                        'error': True,
                        'message': ERRORFILEEXTEN
                    },
                    'data': None
                }), 400

            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            tags = data['tags']

            txtdata = readTextFileContents(filepath, return_metadata=False)
            os.remove(filepath)  # remove temp file
            processedText = model.train(text=txtdata,
                                        sentiment=data['sentiment'],
                                        tags=tags,
                                        returnProcessedText=True)
            con.addNewTraining(modelID=modelID,
                               tags=tags,
                               sentiment=data['sentiment'],
                               processedText=processedText,
                               rawText=txtdata)
            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': None
            }), 200
Beispiel #7
0
def processAsk():
    """
    Gets a prediction from the model.

    Required params:\n
    -action = 'ask'\n
    -stoken\n
    -datatype => text or blob\n
    -data => raw text or base64 blob\n
    -tags => list of tags\n
    :return: The predicted sentiment, related tags and the original submitted text and 201
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    noMissingData, data = getRequiredParameters(request,
                                                stoken='',
                                                action='',
                                                datatype='',
                                                data='')

    if not noMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != "ask":
        return jsonify({
            'status': {
                'message': 'Wrong action. This endpoint is only for teach.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'text' and data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message':
                'Wrong datatype. This endpoint only accepts raw text as \'text\' and pdf or txt files as \'blob\'.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] == 'text':
        txtdata = str(request.json['data'])

        # remove escape characters, if any
        txtdata = txtdata.replace("\\", "")

        if con.getModelInfo(str(userID)) == [None, None]:
            return jsonify({
                'status': {
                    'message': "No model created yet.",
                    'error': True
                },
                'data': None
            }), 400

        modelID, modelName = con.getModelInfo(str(userID))

        availTraining = con.getAvailableTraining(modelID)

        ifPrevKnowSimilar = False
        for eachTraining in availTraining:
            if getCosineSimilarity(
                    txtdata,
                    eachTraining[1]) >= PREVIOUS_KNOWLEDGE_SIMILARITY_RATE:
                ifPrevKnowSimilar = True
                break

        if not ifPrevKnowSimilar:
            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': "Not available",
                    'suggested': "Not available",
                    'text': txtdata
                }
            }), 200

        model = Classifier(makeNewModel=False, modelName=modelName)

        predSentiment, retTags = model.predict(txtdata)

        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'predictedSentiment': predSentiment,
                'suggested': retTags,
                'text': txtdata
            }
        }), 200
    elif data['datatype'] == 'blob':
        if type(data['data']) != list and type(data['data']) != str:
            return jsonify({
                'status': {
                    'error':
                    True,
                    'message':
                    'Data in the \'data\' field should be sent as a string or a list of items.'
                },
                'data': None
            }), 400

        if type(data['data']) == list:  # if multiple files
            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            predSentiment = list()
            retTags = list()
            for item in data['data']:
                filename = str(uuid.uuid4())  # generate temp file name
                status, filepath = getFileBase64(item, filename)

                # in case file received is not a text-based (.pdf/.txt) file
                if not filepath.endswith('pdf') and not filepath.endswith(
                        'txt'):
                    os.remove(filepath)
                    return jsonify({
                        'status': {
                            'error': True,
                            'message': "Wrong format for data."
                        },
                        'data': None
                    }), 400

                if not status:
                    return jsonify({
                        'status': {
                            'error': False,
                            'message': ERRORFILEEXTEN
                        }
                    }), 400

                textData = readTextFileContents(filepath)
                sent, tag = model.predict(textData)
                predSentiment.append(sent)
                retTags.append(tag)
                os.remove(filepath)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': predSentiment,
                    'suggested': retTags,
                    'text': data['data']
                }
            }), 200
        elif type(data['data']) == str:  # if single file
            modelID, modelName = con.getModelInfo(str(userID))
            model = Classifier(makeNewModel=False, modelName=modelName)

            filename = str(uuid.uuid4())  # generate temp file name
            status, filepath = getFileBase64(data['data'], filename)

            # in case file received is not a text-based (.pdf/.txt) file
            if not filepath.endswith('pdf') and not filepath.endswith('txt'):
                os.remove(filepath)
                return jsonify({
                    'status': {
                        'error': True,
                        'message': "Wrong format for data."
                    },
                    'data': None
                }), 400

            textData = readTextFileContents(filepath)
            os.remove(filepath)

            predSentiment, retTags = model.predict(textData)

            return jsonify({
                'status': RETURN_SUCCESS_STATUS,
                'data': {
                    'predictedSentiment': predSentiment,
                    'suggested': retTags,
                    'text': data['text']
                }
            }), 200
Beispiel #8
0
def login():
    """
    Checks user's credentials and if successful, returns a session token lasting 3 hours for the user.

    Required params:\n
    -action = 'login'\n
    -username\n
    -userpw\n

    :return: A session token and 201 if successful.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request,
                                                 action='',
                                                 username='',
                                                 userpw='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    # assume password sent is always unhashed, unless specified
    data['hashed'] = False
    if request.json['hashed'] is not None and request.json['hashed']:
        data['hashed'] = request.json['hashed']

    con = Conn()
    sessionToken = generateSessionToken()

    if con.userLogin(data['username'], data['userpw'], sessionToken,
                     data['hashed']) == -1:
        return jsonify({
            'status': {
                'error': False,
                'message': "User login failed."
            },
            'data': None
        }), 403

    return jsonify({
        'status': RETURN_SUCCESS_STATUS,
        'data': {
            'token': sessionToken
        }
    }), 201
Beispiel #9
0
def getTags():
    """
    Gets suggested tags.

    Required params:\n
    -tags => list of tags, separated by a delimiter (default commas)
    -mid => model ID

    Optional params:\n
    -howMany => default 5
    -delim => default comma, used to separate the tags
    :return: Suggested tags based on previous taught knowledge.
    """
    request.get_data()
    if request.args is None:
        return jsonify({
            'status': {
                'error': True,
                'message': "No data is received."
            },
            'data': None
        }), 400

    userTags = request.args.get("tags")
    modelID = request.args.get("mid")
    returnNumber = request.args.get(
        "howMany") if request.args.get("howMany") is not None and int(
            request.args.get("howMany")) > 0 else DEFAULT_RETURN_SUGGESTED_TAGS
    delim = request.args.get("delim") if request.args.get(
        "delim") is not None else ","

    if userTags is None or modelID is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incomplete arguments. Expected mid and tags but only found " +
                ("usertags" if modelID is None else "modelID")
            },
            'data': None
        }), 400

    if type(userTags) == str:
        userTags = list(userTags.split(delim))
    elif type(userTags) != list:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Wrong format for tags. Allowed formats are either string or lists in square brackets."
            },
            'data': None
        }), 400

    con = Conn()
    tagsFromDB = con.getTags(modelID=modelID)

    relatedTags = getSuggestedTags(userTags, tagsFromDB, returnNumber)

    return jsonify({
        'status': RETURN_SUCCESS_STATUS,
        'data': {
            "tags": userTags,
            "mid": modelID,
            "suggested": relatedTags,
            "howMany": returnNumber
        }
    }), 200
Beispiel #10
0
def close(conn: Conn) -> None:
    print("Closing")
    conn.send_control("fin = 1")
    conn.socket.close()
    conn.socket = None
    conn = None
Beispiel #11
0
    def __init__(self):

        conn = Conn()
        self.engine = conn.conn()
Beispiel #12
0
def dial(address: str):
    conn = Conn()
    conn.connect(address)
    if conn.connect:
        return conn
    return None
Beispiel #13
0
def recv(conn: Conn, length: int) -> bytes:
    return conn.recv(length)
def chat_server():
    server_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    server_socket.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
    server_socket.bind((HOST, PORT))
    server_socket.listen(15)

    main_conn = Conn(server_socket)

    # add server socket object to the list of readable connections
    currentConnections.append(main_conn)

    print("Chat server started on port", str(PORT))

    while 1:
        sockets = (o.socket for o in currentConnections)

        # get the list sockets which are ready to be read through select
        # 4th arg, time_out  = 0 : poll and never block
        ready_to_read, ready_to_write, in_error = select.select(
            sockets, [], [], 0)

        for sock in ready_to_read:
            for conn in currentConnections:
                if conn.socket == sock:
                    currentConn = conn

            # a new connection request recieved
            if currentConn == main_conn:
                sockfd, addr = server_socket.accept()
                currentConnections.append(Conn(sockfd, addr))
            # a message from a client, not a new connection
            else:
                # process data recieved from client,
                try:
                    # receiving data from the socket.
                    data = sock.recv(RECV_BUFFER)
                    if data:
                        print("data received:", data)

                        msg = json.loads(data)

                        if msg.get("setname"):
                            currentConn.username = msg["setname"]
                            currentConn.color = "#%06x" % random.randint(
                                0, 0xFFFFFF)
                            currentConn.userId = random.randint(
                                0, 0xFFFFFF)  # TODO: check if userId is unique

                            welcomeMsg = {
                                'msgtype': 2,
                                'username': currentConn.username,
                                'color': str(currentConn.color),
                                'userid': currentConn.userId
                            }
                            send_message(currentConn, json.dumps(welcomeMsg))

                            broadcast(
                                server_socket, sockfd, '{"servermsg": "' +
                                conn.username + ' entrou no chat."}')

                            # FIXME: add this in a function
                            usersJson = {"users": []}
                            for c in currentConnections:
                                if hasattr(c, 'username') and hasattr(
                                        c, 'userId') and hasattr(c, 'color'):
                                    user = {
                                        'username': c.username,
                                        'color': str(c.color),
                                        'userid': c.userId
                                    }
                                    usersJson['users'].append(user)

                            broadcast(server_socket, sockfd,
                                      json.dumps(usersJson))
                        elif msg.get("action"):
                            if msg["action"] == "showusers":
                                usersJson = {"users": []}
                                for c in currentConnections:
                                    if c != currentConn and c != main_conn and hasattr(
                                            c, 'username') and hasattr(
                                                c, 'userId') and hasattr(
                                                    c, 'color'):
                                        user = {
                                            'username': c.username,
                                            'color': str(c.color),
                                            'userid': c.userId
                                        }
                                        usersJson['users'].append(user)

                                send_message(currentConn,
                                             json.dumps(usersJson))
                        else:
                            # there is something in the socket
                            message = {
                                'content': msg["content"],
                                'username': currentConn.username,
                                'color': str(currentConn.color),
                                'userid': currentConn.userId
                            }
                            broadcast(server_socket, sock, json.dumps(message))
                    else:
                        # remove the socket that's broken
                        if currentConn in currentConnections:
                            currentConnections.remove(currentConn)

                        # at this stage, no data means probably the connection has been broken
                        broadcast(
                            server_socket, sock, '{"servermsg": Usuario "' +
                            currentConn.username + ' saiu"}')
                # exception
                except:
                    broadcast(
                        server_socket, sock, '{"servermsg": "Usuario ' +
                        currentConn.username + ' saiu"}')
                    if currentConn in currentConnections:
                        currentConnections.remove(currentConn)

                    continue

    server_socket.close()
Beispiel #15
0
                    max_len = len(sentence)
                    longest_sentence = sentence

    start = lines.decode('utf8').find(longest_sentence)
    end = start + max_len
    utf8_lines = lines.decode('utf8')
    pre = utf8_lines[0:start]
    after = utf8_lines[end:]
    return pre, longest_sentence, after

def removeIllegalChar(line):
    return re.sub('\[|\]|\/|\'|\"|\(|\)|\!|\?|\~','',line)

es = Elasticsearch()

conn = Conn().getConnection()
cursor = conn.cursor()
upcursor = conn.cursor()
sql = "select id, title, substring_index(content,'相关原创文章,敬请关注',1) from CrawlPage where content not like '%</a>%'"
cursor.execute(sql)
for row in cursor.fetchall():
    id = row[0]
    title = row[1]
    content = row[2]
    title = re.sub('\[|\]|\/|\'|\"|\(|\)|\!|\?|\~|\-','',title)

    try:
        res = es.search(index="app", body={"fields":["title"],"size":1,"query": {"query_string": {"query":title}}})
        for hit in res['hits']['hits']:
            print "process:", id, title
            pre, sentence, after = FindLongestSentence(content)
Beispiel #16
0
import sqlalchemy as db
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import sessionmaker

from conn import Conn

engine = db.create_engine(
    Conn.get_url(),
    echo=True
)

meta = db.MetaData(engine)
Base = declarative_base(metadata=meta)

Session = sessionmaker()
Session.configure(bind=engine)
Beispiel #17
0
    http_server = tornado.httpserver.HTTPServer(
        Application(handlers=handlers, debug=False))
    '''
    监听tcp
    启动消息队列
    监听ws端口
    发送接收消息
    '''

    # 初始化TCP连接 监听控制端口
    logging.info("%s : starting at port:%d...", MyTCPServer.__name__,
                 conf.PORT)
    MyTCPServer(CtrlServer, conf.PORT, http_server).run()

    # 启动定时器
    Conn.on_server_start()

    # 启动消息队列 并开始接收定时器
    zmq_server.run(Conn.on_receive_timer)

    def server_stop():
        # 服务器结束
        logging.info('stopping gate...')
        zmq_server.close()
        ioloop.IOLoop.instance().stop()

    def sig_stop(sig, frame):
        # 退出信号处理
        logging.warning('caught signal: %s', sig)
        ioloop.IOLoop.instance().add_callback(server_stop)
Beispiel #18
0
reload(sys)
sys.setdefaultencoding("utf-8")
import MySQLdb
from conn import Conn

begin = '''<?xml version="1.0" encoding="UTF-8"?>
<urlset
      xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
      xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
      xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
      http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd">
'''
end = '</urlset>'
subbegin = '  <url><loc>'
subend = '</loc></url>'
conn = Conn().getConnection()


def addUrl(sitemap, url):
    return sitemap + "%s%s%s\n" % (subbegin, url, subend)


def addStaticUrl(sitemap):
    sitemap = addUrl(sitemap, 'http://www.shareditor.com/')
    sitemap = addUrl(sitemap, 'http://www.shareditor.com/bloglist/1')
    sitemap = addUrl(sitemap, 'http://www.shareditor.com/bloglist/2')
    sitemap = addUrl(sitemap, 'http://www.shareditor.com/bloglist/3')
    sitemap = addUrl(sitemap, 'http://www.shareditor.com/bloglist/4')
    sitemap = addUrl(sitemap, 'http://www.shareditor.com/bloglist/5')
    sitemap = addUrl(sitemap, 'http://favorite.shareditor.com/favorite/')
    sitemap = addUrl(
    start = lines.decode('utf8').find(longest_sentence)
    end = start + max_len
    utf8_lines = lines.decode('utf8')
    pre = utf8_lines[0:start]
    after = utf8_lines[end:]
    return pre, longest_sentence, after


def removeIllegalChar(line):
    return re.sub('\[|\]|\/|\'|\"|\(|\)|\!|\?|\~', '', line)


es = Elasticsearch()

conn = Conn().getConnection()
cursor = conn.cursor()
upcursor = conn.cursor()
sql = "select id, title, substring_index(content,'相关原创文章,敬请关注',1) from CrawlPage where content not like '%</a>%'"
cursor.execute(sql)
for row in cursor.fetchall():
    id = row[0]
    title = row[1]
    content = row[2]
    title = re.sub('\[|\]|\/|\'|\"|\(|\)|\!|\?|\~|\-', '', title)

    try:
        res = es.search(index="app",
                        body={
                            "fields": ["title"],
                            "size": 1,
Beispiel #20
0
import sys

reload(sys)
sys.setdefaultencoding("utf-8")

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
import jieba
from jieba import analyse
import MySQLdb
import numpy as np
from sklearn import metrics
from sklearn.svm import SVC
from conn import Conn

conn = Conn().getConnection()


def get_segment(all=False):
    cursor = conn.cursor()

    if True == all:
        # 找到全部文章的标题和内容
        sql = "select id, title, content from CrawlPage"
    else:
        # 找到尚未切词的文章的标题和内容
        sql = "select id, title, content from CrawlPage where segment is null"
    cursor.execute(sql)

    for row in cursor.fetchall():
        print "cutting %s" % row[1]
Beispiel #21
0
def register():
    """
    Registers a user and creates a model for them.

    Required params:\n
    -action = 'register'\n
    -username\n
    -userpw\n
    -email\n
    -firstname\n
    -lastname\n
    :return: Returns the modelID and 201 for them.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request,
                                                 action='',
                                                 username='',
                                                 userpw='',
                                                 email='',
                                                 firstname='',
                                                 lastname='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()

    if data['action'] != 'register':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for registration.',
                'error': True
            },
            'data': None
        }), 403

    username = data["username"]
    password = data["userpw"]  # assume plain password sent over https

    if con.usernameExists(username):
        return jsonify({
            'status': {
                'message': 'Username already exists.',
                'error': True
            },
            'data': None
        }), 403

    userID = con.registerUser(username=username,
                              pwd=password,
                              firstName=data['firstname'],
                              lastName=data['lastname'],
                              email=data['email'],
                              usePwHash=False)

    # make new model
    model = Classifier(makeNewModel=True)
    mid = con.addNewModel(userID=str(userID), modelName=model.modelName)

    # if error during user creation
    if userID == -1 or mid == -1:
        return jsonify({
            'status': {
                'message':
                'Server encountered error with user registration. Please try again later',
                'error': True
            },
            'data': None
        }), 500

    # if user registration is ok
    return jsonify({
        'status': RETURN_SUCCESS_STATUS,
        'data': {
            'mid': mid
        }
    }), 201
Beispiel #22
0
def retrain():
    """
    Used to retrain the model.

    Required params:\n
    -action = 'retrain'\n
    -stoken => the session token \n
    -data => a list of trainingID that will be deleted\n

    :return: A success message and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request,
                                                 stoken='',
                                                 action='',
                                                 data='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data["stoken"])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'retrain':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for retraining.',
                'error': True
            },
            'data': None
        }), 403

    if type(data['data']) != list:
        return jsonify({
            'status': {
                'message':
                'Bad data. Data in the \'data\' field must be sent as list.',
                'error': True
            },
            'data': None
        }), 400

    # get old model info
    oldModelID, oldModelName = con.getModelInfo(str(userID))

    if not con.removeTraining(oldModelID, data['data']):
        return jsonify({
            'status': {
                'message': 'Internal server encountered. Please try again.',
                'error': True
            },
            'data': None
        }), 500

    availableTraining = con.getAvailableTraining(oldModelID)

    # make a new model
    model = Classifier(makeNewModel=True)
    for eachData in availableTraining:
        model.train(text=eachData[2],
                    sentiment=eachData[3],
                    tags=eachData[4],
                    fromDB=True)

    # update db with new model info
    if not con.updateModelInfo(oldModelID, modelName=model.modelName):
        return jsonify({
            'status': {
                'message': 'Internal server error when updating the resource.',
                'error': True
            },
            'data': None
        }), 500

    if not oldModelName.endswith(PICKLE_FILE_EXTENSION):
        oldModelName += PICKLE_FILE_EXTENSION

    os.remove(os.path.join(os.path.dirname(__file__),
                           'Models/' + oldModelName))

    return jsonify({
        'status': {
            'message': 'Model successfully retrained.',
            'error': False
        },
        'data': None
    }), 200
Beispiel #23
0
def send(conn: Conn, data: bytes, count: int = 3) -> int:
    sent = conn.send(data)
    if sent == len(data) or count <= 0:
        return sent
    return sent + send(conn, data[sent:], count - 1)
Beispiel #24
0
def getTrainingData():
    """
    Gets all the available training data.

    Required params:\n
    -action = 'getData'\n
    -stoken\n

    :return: Returns the training data for that user and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    chkMissingData, data = getRequiredParameters(request, stoken='', action='')

    if not chkMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data["stoken"])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'getData':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for retrieving training data.',
                'error': True
            },
            'data': None
        }), 403

    modelID, modelName = con.getModelInfo(str(userID))

    availableTraining = con.getAvailableTraining(modelID)

    formattedTraining = list()
    for item in availableTraining:
        trainingDict = dict()
        trainingDict["id"] = item[0]
        trainingDict["rawText"] = item[1]
        trainingDict["sentiment"] = item[3]
        trainingDict["tags"] = item[4]
        trainingDict["dateTrained"] = item[5]
        formattedTraining.append(trainingDict)

    return jsonify({
        'status': RETURN_SUCCESS_STATUS,
        'data': {
            'availableTraining': formattedTraining
        }
    }), 200
Beispiel #25
0
def listen(address: str, listen: int = 1) -> Conn:
    conn = Conn()
    conn.listen(address, listen)
    return conn
Beispiel #26
0
def getOCRtext():
    """
    Gets extracted text for an image.

    Required params:\n
    -action = 'ocr'\n
    -stoken\n
    -datatype = blob\n
    -data => list (if multiple) or string (if single)\n

    :return: Returns the extracted text and 200.
    """
    request.get_data()
    if request.json is None:
        return jsonify({
            'status': {
                'error':
                True,
                'message':
                "Incorrect header type. Header type should be application/json."
            },
            'data': None
        }), 400

    data = request.json
    noMissingData, data = getRequiredParameters(request,
                                                stoken='',
                                                datatype='',
                                                data='',
                                                action='')

    if not noMissingData:
        return jsonify({
            'status': {
                'message': 'Missing required parameter \'' + data + '\'',
                'error': True
            },
            'data': None
        }), 400

    con = Conn()
    userID = con.checkToken(data['stoken'])

    if userID == -1:
        return jsonify(INVALID_SESSION_TOKEN), 403

    if data['action'] != 'ocr':
        return jsonify({
            'status': {
                'message':
                'Wrong action. This endpoint is only for getting OCR text.',
                'error': True
            },
            'data': None
        }), 403

    if data['datatype'] != 'blob':
        return jsonify({
            'status': {
                'message': 'Wrong request. This endpoint is only for blobs.',
                'error': True
            },
            'data': None
        }), 403

    if type(data['data']) != list and type(data['data']) != str:
        return jsonify({
            'status': {
                'message':
                'Wrong data type for \'data\' field. Accepted types are str or list.',
                'error': True
            },
            'data': None
        }), 400

    if type(data['data']) == list:  # if multiple files
        status, files = [], []
        for item in data['data']:
            filename = str(uuid.uuid4())  # generate temp file name
            iferr, retval = getFileBase64(item, filename, userID)
            status.append(iferr)
            files.append(retval)

        if False in status:  # if any error during converting the file
            return jsonify(
                {'status': {
                    'error': False,
                    'message': ERRORFILEEXTEN
                }}), 400

        ocrtext = []
        for eachFile in files:
            ocrtext.append(getOcr(eachFile))
            os.remove(eachFile)

        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'ocrtext': ocrtext,
                'type': 'list'
            }
        }), 200

    elif type(data['data']) == str:  # if single file
        filename = str(uuid.uuid4())  # generate temp file name
        status, filepath = getFileBase64(request.json['data'], filename,
                                         userID)

        if not status:
            # if error during converting the file
            return jsonify(
                {'status': {
                    'error': False,
                    'message': ERRORFILEEXTEN
                }}), 400

        ocrtext = getOcr(filepath)
        os.remove(filepath)  # remove temp file
        return jsonify({
            'status': RETURN_SUCCESS_STATUS,
            'data': {
                'ocrtext': ocrtext,
                'type': 'str'
            }
        }), 200
Beispiel #27
0
def accept(conn: Conn):
    return conn.accept()
Beispiel #28
0
 def __init__(self):
     self.user = User()
     conn = Conn()
     self.engine = conn.conn()