コード例 #1
0
ファイル: ebay_service.py プロジェクト: Kirill5k/ebay-app
class EbayService:
    logger = Logger.of('EbayService')
    client = EbayClient()
    predictor = Seq2SeqPredictor.from_file(Config.get_filepath('predictor-model'), Config.get_filepath('predictor-weights'))
    embeddings = WordEmbeddings.from_file(Config.get_filepath('word2vec'))

    @classmethod
    def get_latest_phones(cls) -> List[EbayPhone]:
        phones = cls.client.get_latest_phones()
        for phone in phones:
            phone.formatted_title = cls.__get_formatted_title(phone)
            cls.__save(phone)
        return phones

    @classmethod
    def __get_formatted_title(cls, phone: EbayPhone):
        try:
            return cls.__format_title(phone.title)
        except Exception as error:
            cls.logger.error(f'error processing phone "{phone.title}": {error}')
            return WordEmbeddings.UNKNOWN

    @classmethod
    def __format_title(cls, ebay_title: str) -> str:
        indexes = cls.embeddings.sentences_to_indices([ebay_title])
        prediction_ohs = cls.predictor.predict(indexes)[0]
        prediction = cls.embeddings.ohs_to_sentence(prediction_ohs)
        return prediction.replace(WordEmbeddings.UNKNOWN, '').replace(WordEmbeddings.EMPTY, '').strip()

    @classmethod
    def __save(cls, phone):
        try:
            phone.save()
        except Exception as error:
            cls.logger.error(f'unable to save: {error}')
コード例 #2
0
class CexService:
    logger = Logger.of('CexService')
    client = CexClient()

    @classmethod
    def find_match(cls, query: str) -> List[CexPhone]:
        phones = cls.__query_db(query)
        return phones if len(phones) > 0 else cls.__query_client(query)

    @classmethod
    def __query_db(cls, query):
        return CexPhone.objects(
            query__string=query,
            query__date__gte=Date().minus_days(7).as_date()).all()

    @classmethod
    def __query_client(cls, query):
        phones = cls.client.find_phone(query)
        for_each(cls.__save, phones)
        return phones

    @classmethod
    def __save(cls, phone):
        try:
            phone.save()
        except Exception as error:
            cls.logger.error(f'unable to save: {error}')
コード例 #3
0
ファイル: phone_service.py プロジェクト: Kirill5k/ebay-app
class PhoneService:
    logger = Logger.of('PhoneService')

    PRICE_THRESHOLD = 0.9

    ebay_service = EbayService()
    cex_service = CexService()
    notification_service = NotificationService()

    @classmethod
    def fetch_latest_phones(cls):
        phones = cls.ebay_service.get_latest_phones()
        cls.logger.info(f'found {len(phones)} phones')
        for phone in phones:
            cls.check_price(phone)

    @classmethod
    def check_price(cls, ebay_phone: EbayPhone):
        cls.logger.info(f'{ebay_phone.title} (£{ebay_phone.price})')
        cls.logger.info(f' |--> {ebay_phone.formatted_title}')
        if ebay_phone.is_recognized and ebay_phone.has_trusted_seller:
            cex_phones = cls.cex_service.find_match(ebay_phone.formatted_title)
            cex_price = average_cash_price(cex_phones)
            if cex_price > 0 and cex_price * cls.PRICE_THRESHOLD >= ebay_phone.price:
                cls.notification_service.send_notification(
                    ebay_phone, cex_price)
コード例 #4
0
class DataSet:
    logger = Logger.of('DataSet')

    def __init__(self,
                 embeddings: WordEmbeddings,
                 data_file,
                 x_label='title',
                 y_labels=['brand']):
        self.data = pd.read_csv(data_file)
        self.embeddings = embeddings
        self.vocab_size = self.embeddings.size
        self.__prepare_x(x_label)
        self.__prepare_y(y_labels)
        self.logger.info('DataSet created')

    def __prepare_x(self, x_label):
        self.logger.info('preparing X')
        self.X_raw = self.data[x_label].values
        self.X_max_len = max_len(self.X_raw)
        indexed_sentences = self.embeddings.sentences_to_indices(self.X_raw)
        self.X_indexed = pad_sequences(indexed_sentences,
                                       self.X_max_len,
                                       dtype='int32')

    def __prepare_y(self, y_labels):
        self.logger.info('preparing y')

        def create_label(row):
            if 'model' in y_labels and row['model'] == WordEmbeddings.UNKNOWN:
                return WordEmbeddings.UNKNOWN
            return ' '.join([
                row[label] for label in y_labels
                if WordEmbeddings.UNKNOWN not in row[label]
            ])

        self.y_raw = [create_label(row) for index, row in self.data.iterrows()]
        self.y_max_len = max_len(self.y_raw)
        y_oh = self.embeddings.sentences_to_oh(self.y_raw)
        y_oh = [
            pad_sequences(y_oh_part,
                          maxlen=self.y_max_len,
                          padding='post',
                          value=self.embeddings.get_oh('EMP'))
            for y_oh_part in np.array_split(y_oh, 10)
        ]
        self.y_oh = np.concatenate(y_oh)

    def get_all(self):
        return self.X_indexed, self.y_oh

    def get_train_test_data(self, test_size=0.2):
        '''
        :return: X_train, X_test, y_train, y_test
        '''
        return train_test_split(self.X_indexed,
                                self.y_oh,
                                test_size=test_size,
                                random_state=48)
コード例 #5
0
class EbayClient:
    client_id: str
    client_secret: str
    access_token: AccessToken = None
    logger = Logger.of('EbayClient')

    def __init__(self):
        self.client_id = Config.ebay['client_id']
        self.client_secret = Config.ebay['client_secret']

    @retry(times=5, wait=10)
    def __get_access_token(self) -> dict:
        url = 'https://api.ebay.com/identity/v1/oauth2/token'
        auth = HTTPBasicAuth(self.client_id, self.client_secret)
        headers = {'Content-Type': 'application/x-www-form-urlencoded'}
        payload = {
            'grant_type': 'client_credentials',
            'scope': 'https://api.ebay.com/oauth/api_scope'
        }
        response = requests.post(url=url,
                                 auth=auth,
                                 headers=headers,
                                 data=payload)
        return response.json()['access_token']

    def __update_token(self) -> None:
        if self.access_token is None or self.access_token.is_expired():
            self.logger.info('updating token')
            token = self.__get_access_token()
            self.access_token = AccessToken(token)

    @retry(times=5, wait=10)
    def __search(self, query, start_time) -> dict:
        filters = 'conditionIds:{1000|1500|2000|2500|3000|4000|5000}'
        filters += ',buyingOptions:{FIXED_PRICE}'
        filters += ',deliveryCountry:GB'
        filters += ',itemStartDate:[{}]'.format(start_time)
        filters += ',price:[10..350]'
        filters += ',priceCurrency:GBP'
        filters += ',itemLocationCountry:GB'

        url = 'https://api.ebay.com/buy/browse/v1/item_summary/search'
        headers = {
            'authorization': f'Bearer {self.access_token.token}',
            'x-ebay-c-marketplace-id': 'EBAY_GB'
        }
        params = {'q': query, 'category_ids': '9355', 'filter': filters}

        result = requests.get(url=url, params=params, headers=headers)
        return result.json().get('itemSummaries', [])

    def get_latest_phones(self, minutes=10) -> list:
        self.__update_token()
        phones = self.__search(
            query='phone', start_time=Date().minus_minutes(minutes).as_iso())
        return list(map(EbayPhoneMapper.map, phones))
コード例 #6
0
class NotificationService:
    logger = Logger.of('NotificationService')
    webhooks_client = WebhooksClient()

    @classmethod
    def send_notification(cls, ebay_phone: EbayPhone, cex_price: str):
        title = f'{ebay_phone.title}<br>--{ebay_phone.formatted_title}<br>'
        price = f'<i>Ebay price: {ebay_phone.price} / Cex price: {cex_price}</i>'
        url = ebay_phone.url
        cls.logger.info('Sending event')
        cls.logger.info(f'|--> {title}')
        cls.logger.info(f'|--> {price}')
        cls.logger.info(f'|--> {url}')
        cls.webhooks_client.send_notification(title, price, url)
コード例 #7
0
class ModelPredictor:
    logger = Logger.of('ModelPredictor')

    def __init__(self, w2v_model, input_shape=(30, ), output_size=4367):
        embeddings_layer = self.__embeddings_layer(w2v_model)
        self.model = self.__build_model(input_shape, embeddings_layer,
                                        output_size)

    @staticmethod
    def __build_model(input_shape, embeddings_layer, output_size):
        sentence_indices = Input(input_shape, dtype='int32')
        model = embeddings_layer(sentence_indices)
        model = LSTM(256, return_sequences=True)(model)
        model = Dropout(0.5)(model)
        model = LSTM(256, return_sequences=False)(model)
        model = Dropout(0.5)(model)
        model = Dense(512)(model)
        model = Dropout(0.3)(model)
        model = Dense(output_size)(model)
        model = Activation('softmax')(model)
        model = Model(inputs=sentence_indices, outputs=model)
        model.compile(loss='categorical_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        return model

    @staticmethod
    def __embeddings_layer(w2v_model):
        # vocab_len = len(w2v_model.wv.vocab)
        # emb_dim = w2v_model.wv.vector_size
        # emb_matrix = w2v_model.wv.syn0
        # embedding_layer = Embedding(vocab_len, emb_dim, trainable=False)
        # embedding_layer.build((None,))
        # embedding_layer.set_weights([emb_matrix])
        # return embedding_layer
        return w2v_model.wv.get_keras_embedding()

    def train(self, X_train, y_train):
        self.model.fit(X_train,
                       y_train,
                       epochs=2000,
                       batch_size=32,
                       shuffle=True)

    def test(self, X_test, y_test):
        loss, acc = self.model.evaluate(X_test, y_test)
        self.logger.info()
        self.logger.info(f'Test loss = {loss}')
        self.logger.info(f'Test accuracy = {acc}')
コード例 #8
0
ファイル: cex_client.py プロジェクト: Kirill5k/ebay-app
class CexClient:
    logger = Logger.of('CexClient')

    @retry(times=5, wait=10, default_response=[])
    def __search(self, query):
        url = f'https://wss2.cex.uk.webuy.io/v3/boxes/predictivesearch?'
        params = {'q': query}
        result = requests.get(url=url, params=params)
        response = result.json().get('response', {'akc': 'Failure'})
        assert response[
            'ack'] == 'Success', 'Failed to make get request to CEX'
        self.logger.debug(f'cex response: {response}')
        data = response.get('data')
        results = data.get('results', []) if data is not None else []
        return results if results is not None else []

    def find_phone(self, query):
        self.logger.debug(f'cex query: {query}')
        results = self.__search(query)
        return list(
            map(lambda result: CexPhoneMapper.map(result, query), results))
コード例 #9
0
ファイル: data_processing.py プロジェクト: Kirill5k/ebay-app
from domain.ebay import EbayPhone
from domain.phone import PhoneDetails
import pandas as pd
import numpy as np
from utils.text_utils import match_word, tokenize, update_vocabulary
from utils.logging import Logger
from nlp.embeddings import WordEmbeddings
from nlp.training import DataSet
from config import Config


logger = Logger.of('DataPreparation')

def create_embeddings():
    titles = pd.read_csv(Config.get_filepath('train-data'))['title'].tolist()
    titles = ['EMP ' + title + ' EMP' for title in titles]
    embeddings = WordEmbeddings.from_sentences(titles)
    embeddings.save(Config.get_filepath('word2vec'))


def clean_phones():
    logger.info('cleaning phones')
    EbayPhone.objects().update(details=PhoneDetails())
    for phone in EbayPhone.objects(title__contains='*'):
        phone.update(title=phone.title.replace('*', ' '))
    for phone in EbayPhone.objects(title__contains='/'):
        phone.update(title=phone.title.replace('/', ' '))
    for phone in EbayPhone.objects(title__contains='+'):
        phone.update(title=phone.title.replace('+', ' plus '))

コード例 #10
0
from utils.logging import Logger
from time import sleep


logger = Logger.of('Retry')


def retry(times=5, wait=10, default_response=None):
    def decorator(http_call):
        def wrapper(*original_args, **original_kwargs):
            for count in range(times):
                try:
                    return http_call(*original_args, **original_kwargs)
                except Exception as error:
                    logger.error(f'{error}, Retry {count+1}')
                    sleep(wait)
            return default_response if default_response is not None else {}
        return wrapper
    return decorator
コード例 #11
0
class Seq2SeqPredictor:
    logger = Logger.of('Seq2SeqPredictor')

    def __init__(self, model):
        self.model = model
        self.__compile()
        self.logger.info('Seq2SeqPredictor created')

    @classmethod
    def new(cls, word_embeddings, output_shape):
        cls.logger.info('creating new Seq2SeqPredictor model')
        model = Sequential()
        # model.add(Input(input_shape, dtype='int32'))
        model.add(word_embeddings.keras_embeddings_layer())
        model.add(LSTM(512))
        model.add(Dropout(0.5))
        model.add(RepeatVector(output_shape[0]))

        model.add(LSTM(256, return_sequences=True))
        model.add(Dropout(0.5))

        model.add(TimeDistributed(Dense(1024)))
        model.add(Dropout(0.3))
        model.add(TimeDistributed(Dense(output_shape[1])))
        model.add(Activation('softmax'))

        return Seq2SeqPredictor(model)

    @classmethod
    def from_file(cls, model_file, weights_file):
        cls.logger.info(
            f'loading Seq2SeqPredictor model from {model_file} and weights from {weights_file}'
        )
        with open(model_file, 'r') as model_file:
            model_json = model_file.read()
            model = model_from_json(model_json)
            model.load_weights(weights_file)
            return Seq2SeqPredictor(model)

    def __compile(self):
        self.logger.info('compiling Seq2SeqPredictor model')
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='adam',
                           metrics=['accuracy'])

    def summary(self):
        self.model.summary()

    def train(self, X_train, y_train, epochs=50, batch_size=32, shuffle=True):
        self.model.fit(X_train,
                       y_train,
                       epochs=epochs,
                       batch_size=batch_size,
                       shuffle=shuffle)

    def test(self, X_test, y_test):
        loss, acc = self.model.evaluate(X_test, y_test)
        self.logger.info()
        self.logger.info(f'test loss = {loss}')
        self.logger.info(f'test accuracy = {acc}')

    def predict(self, X):
        return self.model.predict(X)

    def save(self, model_file, weights_file):
        self.logger.info(
            f'saving model to file {model_file} and weights to file {weights_file}'
        )
        model_json = self.model.to_json()
        with open(model_file, 'w') as file:
            file.write(model_json)
        self.model.save_weights(weights_file)
コード例 #12
0
ファイル: app.py プロジェクト: Kirill5k/ebay-app
from time import sleep
from utils.logging import Logger
from services.phone_service import PhoneService

logger = Logger.of('MAIN')

if __name__ == '__main__':
    while True:
        PhoneService.fetch_latest_phones()
        logger.info()
        sleep(560)
コード例 #13
0
from nlp.embeddings import WordEmbeddings
from nlp.Seq2SeqPredictor import Seq2SeqPredictor
from nlp.training import DataSet
from utils.logging import Logger
import pandas as pd
from config import Config

logger = Logger.of('TextProcessing')
embeddings = WordEmbeddings.from_file(Config.get_filepath('word2vec'))

dataset = DataSet(embeddings,
                  Config.get_filepath('train-data'),
                  y_labels=['brand', 'model', 'memory', 'color', 'network'])
x_max_len, y_max_len, vocab_size = dataset.X_max_len, dataset.y_max_len, dataset.vocab_size
logger.info(f'max len: {x_max_len}/{y_max_len}, labels count {vocab_size}')
X_train, X_test, y_train, y_test = dataset.get_train_test_data(test_size=0.1)

# predictor = Seq2SeqPredictor.from_file(Config.get_filepath('predictor-model'), Config.get_filepath('predictor-weights'))
predictor = Seq2SeqPredictor.new(embeddings, (y_max_len, vocab_size))
predictor.summary()
predictor.train(X_train, y_train, epochs=40)
predictor.save(Config.get_filepath('predictor-model'),
               Config.get_filepath('predictor-weights'))
predictor.test(X_test, y_test)

test = X_test[0:4000]
expected = y_test[0:4000]
results = predictor.predict(test)

for (inp, exp, res) in zip(test, expected, results):
    r = embeddings.ohs_to_sentence(res)
コード例 #14
0
ファイル: embeddings.py プロジェクト: Kirill5k/ebay-app
class WordEmbeddings:
    logger = Logger.of('WordEmbeddings')
    EMPTY = 'EMP'
    UNKNOWN = 'unknown'

    def __init__(self, model):
        self.model = model
        self.size = len(model.wv.vocab) + 1
        self.oh_dict = {}
        self.logger.info('WordEmbeddings created')

    @classmethod
    def from_file(cls, filename):
        cls.logger.info(f'loading WordEmbeddings from file {filename}')
        model = Word2Vec.load(filename)
        return WordEmbeddings(model)

    @classmethod
    def from_sentences(cls, sentences, size=25, window=5, min_count=1):
        cls.logger.info('creating new WordEmbeddings from text')
        processes_sentences = tokenize(sentences)
        model = Word2Vec(processes_sentences,
                         size=size,
                         window=window,
                         min_count=min_count)
        return WordEmbeddings(model)

    def sentences_to_indices(self, sentences):
        processes_sentences = tokenize(sentences)
        return np.array([[self.get_index(word) for word in sentence]
                         for sentence in processes_sentences])

    def sentences_to_oh(self, sentences):
        processes_sentences = tokenize(sentences)
        return np.array([[self.get_oh(word) for word in sentence]
                         for sentence in processes_sentences])

    def info(self):
        self.logger.info(f'number of word vectors: {self.size}')

    def save(self, filename):
        self.logger.info(f'saving word embeddings to {filename}')
        self.model.init_sims(replace=True)
        self.model.save(filename)

    def keras_embeddings_layer(self):
        vocab_len = len(self.model.wv.vocab)
        emb_dim = self.model.wv.vector_size
        emb_matrix = self.model.wv.syn0
        embedding_layer = Embedding(vocab_len, emb_dim, trainable=True)
        embedding_layer.build((None, ))
        embedding_layer.set_weights([emb_matrix])
        return embedding_layer
        # return self.model.wv.get_keras_embedding()

    def get_index(self, word):
        try:
            return self.model.wv.vocab[word].index
        except Exception as error:
            self.logger.error(f'unknown word {word}')
            return self.model.wv.vocab[WordEmbeddings.UNKNOWN].index

    def get_word(self, index):
        assert index < self.size, 'index is greater than vocab size'
        return self.model.wv.index2word[index]

    def get_vector(self, word):
        return self.model.wv[word]

    def get_oh(self, word):
        if word not in self.oh_dict:
            index = self.get_index(word)
            self.oh_dict[word] = create_oh_vector(index, self.size)
        return self.oh_dict[word]

    def get_word_for_oh(self, oh_encoding):
        assert oh_encoding.shape[
            0] == self.size, f'must have a size of {self.size}'
        index = np.argmax(oh_encoding)
        return self.get_word(index)

    def ohs_to_sentence(self, ohs):
        assert len(ohs.shape) == 2, 'must be an array of ohs'
        words = [self.get_word_for_oh(oh) for oh in ohs]
        return ' '.join(words)

    def indexes_to_sentence(self, indexes):
        words = [self.get_word(index) for index in indexes]
        return ' '.join(words)