Ejemplo n.º 1
0
def main(gamename, seed, algo, task_id):
    env = gym.make(gamename)
    print("start running task", task_id)
    t0 = time.time()
    # set seed
    env.seed(seed)
    torch.manual_seed(seed)
    np.random.seed(seed)
    state_dim, action_dim, action_lim = check_env(env)
    replay_buffer = Data(args.buffersize)
    if algo == 'ddpg':
        trainer = DDPGTrainer(state_dim, action_dim, action_lim, replay_buffer)
    elif algo == 'td3':
        trainer = TD3Trainer(state_dim, action_dim, action_lim, replay_buffer)
    else:
        print("error algo")
        return

    frame_count = 0
    timestep_since_eval = 0
    reward_list = []
    evaluations = []
    for episode in range(args.max_episode):
        trainer.init_episode()
        obs = env.reset()
        obs = torch.from_numpy(obs).reshape(1, state_dim).float()
        reward_episode = 0
        actor_loss_l = []
        critic_loss_l = []
        for i in range(args.T):
            if frame_count < args.start_timesteps:
                action = env.action_space.sample()
            else:
                action = trainer.get_exploration_action(obs)
            new_obs, r, done, _ = env.step(action)
            new_obs = torch.from_numpy(new_obs).reshape(1, state_dim).float()
            reward_episode += r
            sequence = [
                obs,
                torch.from_numpy(action).reshape(1, action_dim).float(),
                reward_episode, new_obs, 0 if done else 1
            ]
            replay_buffer.push(sequence)
            obs = new_obs
            frame_count += 1
            timestep_since_eval += 1
            if done:
                break
        trainer.optimize(i)
        reward_list.append(reward_episode)

        if timestep_since_eval > args.eval_freq:
            timestep_since_eval %= args.eval_freq
            evaluations.append(evaluate_policy(env, trainer))
            trainer.save_model(gamename, evaluations, seed)

        if frame_count > args.Tmax:
            break
    trainer.save_model(gamename, evaluations, seed)
    return "Over"
Ejemplo n.º 2
0
    def execute(self, p_conn):
        debug_on_event = p_conn.get_debug_on_event()

        data = Data()
        data.set_data("debug_on_event", debug_on_event)

        self.emit_("debug_on_event_update_request", data)
Ejemplo n.º 3
0
def get_movies_with_similar_genres(movie_id: int, n: int = 5, popularity_bias: bool = False
                                   , user_bias: bool = False, movies: pd.DataFrame = None):
    # Get all movies and split them into the base movie and the rest

    if n is None:
        n = 5

    # Use the preferred movie df
    if movies is None:
        all_movies = Data.movie_meta()[Column.genres.value]
    else:
        all_movies = movies[Column.genres.value]

    # get the base out of the df and remove it from the rest
    base_genres = eval(all_movies.loc[movie_id])
    all_movies = all_movies.drop(movie_id)

    # count similar genres
    all_movies = all_movies.apply(
        lambda row: count_elements_in_set(row, base_genres)
    )
    # remove all movies which have no genre in common
    filtered_movies_sum = all_movies[all_movies > 0]

    # if user_bias is true
    if user_bias:
        # reduce the amount of movies to n * 10 movies
        top_n_mul_ten = filtered_movies_sum.nlargest(n * 10)
        ratings = Data.ratings()

        # group by movie
        ratings_grouped = ratings.groupby(str(Column.movie_id))
        # calculate mean rating and number of ratings for each movie
        # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) )
        measures: pd.DataFrame = ratings_grouped.agg(['mean', 'count'])[str(Column.rating)]

        # merging mean, count and genre sum into one DataFrame
        measures_movies = pd.merge(measures, pd.DataFrame(top_n_mul_ten), left_index=True, right_index=True)

        if popularity_bias:
            # give more weight to the number of ratings (~popularity)
            # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings)
            # and multiplying the count back in
            # additionally multiply the genre back in
            # to prevent good rated movies with little correlation to the genres
            results = measures_movies.eval('(mean ** 3) * count * genres')
        else:
            # multiply genre to prevent good rated movies with little correlation to the genres
            results = measures_movies.eval('mean * genres')
    else:
        results = filtered_movies_sum

    # breakpoint()
    return results
Ejemplo n.º 4
0
    def _check_dbwhere(self, p_curdbwhere):
        #FIXME: documentacion

        predbwhere = self.__predbwhere

        if p_curdbwhere != predbwhere:
            data = Data()
            data.set_data("where", p_curdbwhere)

            self.emit_("where_changed", data)

            self.__predbwhere = p_curdbwhere
Ejemplo n.º 5
0
    def _check_dbstack(self, p_curdbstack):
        #FIXME: documentacion

        predbstack = self.__predbstack

        if p_curdbstack != predbstack:
            data = Data()
            data.set_data("stack", p_curdbstack)

            self.emit_("stack_changed", data)

            self.__predbstack = p_curdbstack
Ejemplo n.º 6
0
    def execute(self, p_conn):
        # FIXME: re-implementar este metodo haciendo
        #        uso de 'p_conn.is_file_in_loadpath'

        funcname = self.__funcname

        if funcname[1]:
            real = "['%s', filemarker(), '%s']" % (funcname[0], funcname[1])
        else:
            real = "'%s'" % funcname[0]

        dbstatus = p_conn.dbstatus(real)
        lines = []

        if dbstatus:
            file_ = dbstatus[0]["file"]

            if (funcname[1] or file_):  # Esto es evitando funciones definidas
                # en el CommandWindow.
                if not file_:
                    file_ = p_conn.file_in_loadpath("'%s'" % self.__filename)

                if self.__file == file_:
                    lines = dbstatus[0]["lines"]

        data = Data()
        data.set_data("file", self.__file)
        data.set_data("function", funcname[1] if funcname[1] else funcname[0])
        data.set_data("lines", lines)

        self.emit_("breakpoints_update_request", data)
Ejemplo n.º 7
0
    def _check_dbstatus(self, p_curdbstatus):
        #FIXME: documentacion

        #OJO: Puede que cambien los breakpoints y no se emita la sennal.
        #     Esto no es problema, porque no tiene solucion en Octave_3.2.3.

        predbstatus = self.__predbstatus

        if p_curdbstatus != predbstatus:
            data = Data()
            data.set_data("breakpoints", p_curdbstatus)

            self.emit_("breakpoints_changed", data)

            self.__predbstatus = p_curdbstatus
def get_year_relevance(movie_id:int, n:int=0):
    release_years= Data.movie_meta()[Column.release_year.value]
    movie_year=release_years.loc[movie_id]
    release_years = release_years.subtract(movie_year)
    release_years = release_years.abs()
    release_years=1-release_years.div(release_years.max())
    return release_years.drop(movie_id)
Ejemplo n.º 9
0
    def build_index(cls):
        if cls.ix is None:
            cls.init()

        # automatically calls iw.commit()
        iw = cls.ix.writer()
        for movie_id, movie in Data.movie_meta().iterrows():
            # extract fields
            fields: Dict = {
                'movie_id': movie_id,
                'title': movie[Column.title.value],
                # 'tagline': movie[Column.tagline.value],
                # 'summary': movie[Column.summary.value],
                # 'keywords': movie[Column.keywords.value],
                # 'popularity': movie[Column.num_ratings.value],
                # 'genres': movie[Column.genres.value],
            }
            # filter empty values (inserting fails for np.nan values)
            fields = {
                key: val
                for key, val in fields.items()
                if val is not None and val is not np.nan and val != ''
            }

            # insert into index
            iw.update_document(**fields)

        iw.commit(optimize=True)
Ejemplo n.º 10
0
def get_imdb_id(movielens_id: int) -> int:
    movies = Data.movie_meta()
    if movielens_id not in movies.index:
        raise MovieNotFoundException()

    movie = movies.loc[movielens_id]
    return movie[Column.imdb_id.value]
Ejemplo n.º 11
0
def get_movie_meta_for(movie_ids: List[int]) -> List[Dict]:
    # if single movie, pack into list
    if isinstance(movie_ids, int):
        movie_ids = [movie_ids]

    movie_ids = filter(lambda x: x is not None, movie_ids)

    meta: pd.DataFrame = Data.movie_meta()

    try:
        # filter metadata
        meta = meta.loc[movie_ids]
    except KeyError as e:
        raise MovieNotFoundException(e.args)

    # fetch metadata for the movies, convert to dictionary
    # orientation='records' results in [{'col1': 'val1', 'col2': 'val2'}, {'col1': 'val1', ..}]
    meta_dict: List[Dict] = meta.to_dict(orient='records')

    for item in meta_dict:
        for col in [
                Column.actors, Column.genres, Column.keywords, Column.directors
        ]:
            if not pd.isnull(item[col.value]):
                item[col.value] = eval(item[col.value])

    add_poster_urls(meta_dict)

    return meta_dict
Ejemplo n.º 12
0
def uuid_str():
    '''
    Return a [Base58 encoded][1] UUID that can be used to opaquely identify
    applications, sessions, and users to external clients

    [1]: https://en.wikipedia.org/wiki/Base58
    '''
    return Data(uuid4().bytes).stringWithEncoding(Base58)
Ejemplo n.º 13
0
    def get_similarities_for(cls, movie_id: int, colname: str):
        # get similarity matrix (calculate if necessary)
        sim_matrix = cls.calculate_similarities(colname)

        # get absolute index of movie
        index = Data.movie_meta().index.get_loc(movie_id)

        # get similarities for this movie
        # use .toarray() to convert from sparse matrix
        # use [0] to convert "matrix" with only one row to one-dimensional array
        similarities = sim_matrix[index].toarray()[0]

        # put into pandas Series
        # use index=... to apply original index
        series = pd.Series(index=Data.movie_meta().index, data=similarities)

        return series.drop(movie_id)
Ejemplo n.º 14
0
    def __call__(self, movie_id: int, n: int = 5):
        meta = Data.movie_meta()
        collection = meta[get_collection_mask(movie_id, meta)].index.values

        results: pd.Series = self.method(movie_id, n + 10)

        results = results.drop(collection, errors='ignore')

        return results
Ejemplo n.º 15
0
def get_normalized_popularity():
    # used for popularity bias
    popularity = Data.movie_meta()[Column.num_ratings.value]
    # apply root reduce linearity
    # (if movie A has double the ratings of movie B, its popularity should only be slightly higher)
    popularity **= (1 / 10)
    # normalize
    popularity /= popularity.max()
    return popularity
Ejemplo n.º 16
0
def avg_rating_for_user(user_id: int) -> float:
    """ Calculates the average score for ratings from a specified user. """
    ratings: pd.Series = Data.ratings_as_series()

    # check if user_id exists and raise exception if it does not
    if user_id not in ratings:
        raise UserNotFoundException()

    # select ratings from specified user_id
    user_ratings: pd.Series = ratings.loc[user_id]
    # calculate average using integrated function
    return user_ratings.mean()
Ejemplo n.º 17
0
    def calculate_similarities(cls,
                               colname: str,
                               overwrite_existing: bool = False):
        if colname not in cls.similarity_matrices or overwrite_existing:
            # calculate tf_idf for column
            tfidf_matrix = cls.tf_idf.fit_transform(
                Data.movie_meta()[colname].fillna(''))
            # calculate similarities between movies
            # use dense_output=False (results in sparse matrix) to reduce memory usage
            cls.similarity_matrices[colname] = linear_kernel(
                tfidf_matrix, tfidf_matrix, dense_output=False)

        return cls.similarity_matrices[colname]
Ejemplo n.º 18
0
def get_movielens_id(tmdb_id: int = None, imdb_id: int = None) -> int:
    movies: pd.DataFrame = Data.movie_meta()

    if tmdb_id is not None:
        movie = movies.query(f'{Column.tmdb_id.value} == {tmdb_id}')
    elif imdb_id is not None:
        movie = movies.query(f'{Column.imdb_id.value} == {imdb_id}')
    else:
        return None

    if movie.empty:
        return None

    return movie.index[0]
Ejemplo n.º 19
0
def tmdb_reference(movie_id: int, n: int = 5):
    movie = Data.movie_meta().loc[movie_id]
    # get list from string representation
    similar_tmdb = eval(movie[Column.tmdb_similar.value])

    # get movielens id from tmdb_id
    similar = map(lambda tmdb_id: get_movielens_id(tmdb_id=tmdb_id),
                  similar_tmdb)

    # return with artificial decreasing score
    return pd.Series({
        item: -index
        for index, item in enumerate(similar) if item is not None
    })
def recommend_movies(movie_id: int, n: int = 5, filter_below_avg_ratings: bool = False, popularity_bias: bool = False) \
        -> List[int]:
    ratings = Data.ratings()

    # first get the ratings for the base movie
    ratings_of_base_movie = ratings.query('movie_id == %s' % movie_id)

    # check if there are reviews for this movie
    if ratings_of_base_movie.empty:
        raise MissingDataException('no ratings for movie_id %s' % movie_id)

    if filter_below_avg_ratings:
        # of those, select the above average ratings
        avg_rating = ratings_of_base_movie['rating'].mean()
        # query is actually faster than the python subscription syntax ( users[users['rating'] >= avg] )
        ratings_of_base_movie = ratings_of_base_movie.query('rating >= %f' %
                                                            avg_rating)

    # to get ratings from all the users that have rated/liked the base movie,
    # perform a (left outer) join on all the ratings on user_id
    relevant_movies = ratings_of_base_movie.join(ratings,
                                                 on='user_id',
                                                 lsuffix='_L')
    # remove the columns that were duplicated as result of the join
    relevant_movies = relevant_movies[['movie_id', 'rating']]
    # remove the base movie from the results
    relevant_movies = relevant_movies.query('movie_id != %s' % movie_id)

    if relevant_movies.empty:
        raise MissingDataException(
            'no other ratings from users that rated movie_id %s' % movie_id)

    # group by movie
    relevant_movie_groups = relevant_movies.groupby('movie_id')
    # calculate mean rating and number of ratings for each movie
    # (select rating to remove first level of column index. before: (rating: (mean, count)), after: (mean, count) )
    measures: pd.DataFrame = relevant_movie_groups.agg(['mean',
                                                        'count'])['rating']

    if popularity_bias:
        # give more weight to the number of ratings (~popularity)
        # by raising the avg ratings to some power (to preserve some notion of good vs. bad ratings)
        # and multiplying the count back in
        results = measures.eval('(mean ** 3) * count')
    else:
        results = measures['mean']

    return results
Ejemplo n.º 21
0
def _recommend_movies(movie_id: int, n: int, method: Method) -> List[Dict]:
    if movie_id not in Data.movie_meta().index:
        raise MovieNotFoundException

    # start with the movie itself
    movies: List[int] = [movie_id]

    # calculate similarities
    scores: Series = method(movie_id)
    # and filter out any movies that were recommended recently
    scores = History.filter(scores)

    if method == Method.reference or method == Method.sequels:
        n = 20

    # movies = [base_movie, ...recommendations]
    movies.extend(scores.nlargest(n).index)
    # add recommendations for movies
    History.append(movies)

    return get_movie_meta_for(movies)
Ejemplo n.º 22
0
    def search(cls, query_text: str, n: int, add_posters: bool = True):
        # this method applies a popularity bias to search results
        # as they need to be resorted, more search terms should be provided than necessary,
        # to be able to recover popular results that have rather low scores
        results = cls._search(query_text, n + 25)

        # encapsulate in pandas.Series for further operations
        scores = pd.Series(results, name='score')
        # perform a (right outer) join to connect the search results to the metadata
        df = Data.movie_meta().join(scores, how='right')
        # calculate the weighted score by raising it to some power
        # in order for the popularity to not overpower the score completely
        # and multiply with the number of ratings (the popularity)
        df.eval(f'weighted = score**16 * {Column.num_ratings.value}',
                inplace=True)

        # extract the n best results and export as list
        movie_ids = list(df.nlargest(n, 'weighted').index)
        # fetch metadata
        meta = get_movie_meta_for(movie_ids)

        return meta
Ejemplo n.º 23
0
def get_collection(movie_id: int,
                   df: pd.DataFrame = None,
                   include_base_movie: bool = True,
                   start_from_base_movie: bool = False,
                   wrap_to_start: bool = False) -> pd.DataFrame:
    """
    Get movies from a collection.

    :param movie_id: a movie that is in a collection
    :param df: the pandas DataFrame to search
    :param include_base_movie: whether to include movie_id itself in the result
    :param start_from_base_movie: whether to split the result and start at movie_id
    :param wrap_to_start: if start_from_base_movie: at the end of the collection, wrap over to the start and include the prequels
    :return: a DataFrame containing the movies in the collection
    """
    if df is None:
        df = Data.movie_meta()

    # select movies that are in collection
    m = df[get_collection_mask(movie_id, df)]
    # sort by release year
    m = m.sort_values(by=Column.release_date.value)

    if not include_base_movie:
        m = m.drop(movie_id)

    if start_from_base_movie:
        # split dataframe at base_movie
        sequels = m.loc[movie_id:]
        prequels = m.loc[:movie_id - 1]

        if wrap_to_start:
            # reverse order and join again
            m = pd.concat([sequels, prequels])
        else:
            # just return the movies starting with the base movie
            m = sequels

    return m
Ejemplo n.º 24
0
    def __init__(self, p_mwindow):
        """
            p_mwindow: un 'MainWindow'.

            Retorna:   una 'Connection'.

            Crea una nueva 'Connection'.
        """
        threading.Thread.__init__(self)
        gobject.GObject.__init__(self)

        self.__mwindow = p_mwindow
        self.__continue = True
        self.__term = Terminal(self)

        # Aqui se almacenan los comandos
        # que envia el usuario a octave.
        self.__tail = []

        # Aqui se almacenan los comandos que se
        # ejecutaran cuando el octave este listo.
        self.__tail_prio = TailWithPriority()

        self.__dbstatus_code = self.__get_dbstatus_code()
        self.__dbstack_code = self.__get_dbstack_code()

        self.__prestate = None
        self.__predbstack = None
        self.__predbwhere = None
        self.__predbstatus = None
        self.__predir = None
        #self.__prevars = None# para emitir un "vars_changed"

        flag = Data()
        key = "key"
        self.connect("changed", self.__on_changed, flag, key)
        self.connect("stack_changed", self.__on_stack_changed, flag, key)
        self.connect("where_changed", self.__on_where_changed, flag, key)
Ejemplo n.º 25
0
def actors_as_lists():
    # since eval (convert string representation to object) is costly time-wise, cache results
    return Data.movie_meta()[Column.actors.value].map(eval)
Ejemplo n.º 26
0
    def execute(self, p_conn):
        if "is patch for Octave-3.2.3":
            dbwhere = p_conn.dbwhere() if p_conn.dbstack()["frames"] else {}
            #dbwhere = p_conn.dbwhere()  # Puede hacer un beep!
            file_ = self.__file
            positions = []
            current = False

            if dbwhere and dbwhere["file"] == file_:
                pos = (dbwhere["line"], dbwhere["column"])

                if None not in pos:
                    positions.append(pos)
                    current = True

            data = Data()
            data.set_data("positions", positions)
            data.set_data("current", current)

            self.emit_("stack_update_request", data)
            return

        file_ = self.__file
        omit = self.__omit
        omit = str(omit) if omit else ""
        dbstack = p_conn.dbstack(omit)

        positions = []
        frames = dbstack["frames"]
        current = bool(frames and frames[0]["file"] == file_)
        for frame in frames:
            if frame["file"] == file_:
                positions.append((frame["line"], frame["column"]))

        data = Data()
        data.set_data("positions", positions)
        data.set_data("current", current)

        self.emit_("stack_update_request", data)
Ejemplo n.º 27
0
logging.basicConfig(
    level=logging.INFO,
    format=
    '%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
    datefmt='%a, %d %b %Y %H:%M:%S',
)

if len(sys.argv) >= 2:
    dataIdx = '{:0>2}'.format(sys.argv[1])
else:
    dataIdx = 'CoNLL'

trainPath = 'data/normal/en_train_{}.txt'.format(dataIdx)
testPath = 'data/normal/en_test_CoNLL.txt'

data = Data(inputPathList=[trainPath], testPath=testPath)
return_data = data.loadCoNLL(trainPath, loadFeatures=True)

split_data = train_test_split(*return_data, test_size=0.1, random_state=0)
X_train = split_data[:-2:2]
X_val = split_data[1:-2:2]
y_train, y_val = split_data[-2:]

modelWrapper = BiLSTMCRF(data)
model = modelWrapper.buildModel(feature2idx=data.feature2idx)

history = metricHistory(X_val, y_val, saveDir=dataIdx)
history.set_model(model)
model.fit(X_train,
          y_train,
          epochs=50,
Ejemplo n.º 28
0
from util.plot import Plot, multiplot
from util.stats import cdf_fit_func
import numpy as np
from util.data import Data

# d = Data.load("knn_results_[yelp-mnist].pkl")
d = Data.load("prediction_results.pkl")
names = d.names.copy()
names = [names[0], names[6], names[1]] + names[2:6] + names[7:]
d.reorder([names[0], names[6], names[1]])
d.sort()
d["Errors"] = ([float(v) if (v <= 5) else 5.0 for v in e] for e in d["Errors"])
d["Mean Error"] = (sum(e) / len(e) for e in d["Errors"])
d["Mean Squared Error"] = (sum(v**2 for v in e) / len(e) for e in d["Errors"])
d["Error Variance"] = (float(np.var(e)) for e in d["Errors"])

d._max_display = 1000
print(d)
all_data = d

# Get the unique dimensions and algorithms.
dims = sorted(set(all_data["Dimension"]))
algs = sorted(set(all_data["Algorithm"]))
data_sets = sorted(set(all_data["Data"]))

for ds in data_sets:
    for alg in algs:
        d = all_data[all_data["Data"] == ds]
        d = d[d["Algorithm"] == alg]
        min_index = int(np.argmin(d["Mean Error"]))
        m = d[min_index, "Method"]
Ejemplo n.º 29
0
def directors_as_lists():
    return Data.movie_meta()[Column.directors.value].map(eval)
Ejemplo n.º 30
0
def get_genre_as_lists():
    return Data.movie_meta()[Column.genres.value].map(eval)