Example #1
0
def test_check_db():
    """Test the check_db function."""

    # Check that it returns an ERPDB when given None
    db = check_db(None)
    assert isinstance(db, ERPDB)

    # Check that it returns an ERPDb object when given one
    db = ERPDB()
    db = check_db(db)
    assert isinstance(db, ERPDB)
Example #2
0
    def save_summary(self, db=None):
        """Save out a summary of the scraped ERP paper data."""

        db = check_db(db)

        with open(db.words_path + '/summary/' + self.label + '.json', 'w') as outfile:
            json.dump(self.summary, outfile)
Example #3
0
def make_wc(freq_dist, n_words, label, disp_fig=True, save_fig=False, db=None):
    """Create and display wordcloud.

    Parameters
    ----------
    n_words : int
        Number of top words to include in the wordcloud.
    save_fig : boolean
        Whether to save out the wordcloud.
    """

    wc = create_wc(conv_freqs(freq_dist, 20))

    plt.figure(figsize=(10, 10))
    plt.imshow(wc)
    plt.axis("off")

    if save_fig:

        db = check_db(db)
        s_file = os.path.join(db.figs_path, 'wc', label + '.svg')

        plt.savefig(s_file, transparent=True)
        if not disp_fig:
            plt.close()
Example #4
0
def make_wc(freq_dist, n_words, label, disp_fig=True, save_fig=False, db=None):
    """Create and display wordcloud.

    Parameters
    ----------
    n_words : int
        Number of top words to include in the wordcloud.
    save_fig : boolean
        Whether to save out the wordcloud.
    """

    wc = create_wc(conv_freqs(freq_dist, 20))

    plt.figure(figsize=(10, 10))
    plt.imshow(wc)
    plt.axis("off")

    if save_fig:

        db = check_db(db)
        s_file = os.path.join(db.figs_path, 'wc', label + '.svg')

        plt.savefig(s_file, transparent=True)
        if not disp_fig:
            plt.close()
Example #5
0
def plot_years(year_counts, label, disp_fig=True, save_fig=False, db=None):
    """Plot publications across years histogram."""

    f, ax = plt.subplots(figsize=(10, 5))

    yrs = set(range(1985, 2016))

    # Extract x & y data to plot
    x_dat = [y[0] for y in year_counts]
    y_dat = [y[1] for y in year_counts]

    # Add line and points to plot
    plt.plot(x_dat, y_dat)
    plt.plot(x_dat, y_dat, '.', markersize=16)

    # Set plot limits
    plt.xlim([min(yrs), max(yrs)])
    plt.ylim([0, max(y_dat)+5])

    # Add title & labels
    plt.title('Publication History', fontsize=24, fontweight='bold')
    plt.xlabel('Year', fontsize=18)
    plt.ylabel('# Pubs', fontsize=18)

    if save_fig:

        db = check_db(db)
        s_file = os.path.join(db.figs_path, 'year', label + '.svg')

        plt.savefig(s_file, transparent=True)
        if not disp_fig:
            plt.close()
Example #6
0
def load_pickle_obj(f_name, db=None):
    """Load a custom object, from a pickle file, for ERP-SCANR project.

    Parameters
    ----------
    f_name : str
        File name of the object to be loaded.
    db : ERPDB object, optional
        Database object for the ERP-SCANR project.
    """

    # Check for database object, initialize if not provided
    db = check_db(db)

    # Get all available files, for Count and Words pickled objects
    counts_objs = os.listdir(db.counts_path)
    words_objs = os.listdir(db.words_path)

    # Search for object in saved Count files, and set path if found
    if f_name + '.p' in counts_objs:
        load_path = os.path.join(db.counts_path, f_name + '.p')

    # Search for object in saved Words files, and set path if found
    elif f_name + '.p' in words_objs:
        load_path = os.path.join(db.words_path, f_name + '.p')

    # Raise an error if the file name is not found
    else:
        raise InconsistentDataError('Can not find requested file name.')

    # Load and return the data
    return pickle.load(open(load_path, 'rb'))
Example #7
0
def plot_years(year_counts, label, disp_fig=True, save_fig=False, db=None):
    """Plot publications across years histogram."""

    f, ax = plt.subplots(figsize=(10, 5))

    yrs = set(range(1985, 2016))

    # Extract x & y data to plot
    x_dat = [y[0] for y in year_counts]
    y_dat = [y[1] for y in year_counts]

    # Add line and points to plot
    plt.plot(x_dat, y_dat)
    plt.plot(x_dat, y_dat, '.', markersize=16)

    # Set plot limits
    plt.xlim([min(yrs), max(yrs)])
    plt.ylim([0, max(y_dat) + 5])

    # Add title & labels
    plt.title('Publication History', fontsize=24, fontweight='bold')
    plt.xlabel('Year', fontsize=18)
    plt.ylabel('# Pubs', fontsize=18)

    if save_fig:

        db = check_db(db)
        s_file = os.path.join(db.figs_path, 'year', label + '.svg')

        plt.savefig(s_file, transparent=True)
        if not disp_fig:
            plt.close()
Example #8
0
    def save(self, db=None):
        """Save out json file with all attached data."""

        db = check_db(db)

        with open(db.words_path + '/raw/' + self.label + '.json', 'w') as outfile:
            for art in self:
                json.dump(art, outfile)
                outfile.write('\n')

        # Update history
        self.update_history('Saved')
Example #9
0
def plot_time_assocs(dat, save_fig=False):
    """

    Parameters
    ----------
    dat : list of list of [str, str, int]
        ERP data - [association, P or N, latency]
    """

    # Plot params
    offsets = {'P': 50, 'N': -50}
    rotations = {'P': 45, 'N': -45}

    # Initialize Plot
    fig = plt.figure(figsize=(12, 5))
    fig.suptitle('ERP Correlates Across Time', fontsize=24, fontweight='bold')
    ax = fig.add_subplot(111)

    # Set plot limits
    ax.set_xlim([50, 600])
    ax.set_ylim([-100, 100])

    # Add x-ticks
    plt.xticks([250, 500], ['250 ms', '500 ms'])
    ax.set_yticks([])

    # Set ticks and plot lines
    ax.spines['right'].set_color('none')
    ax.spines['bottom'].set_position('center')
    ax.spines['top'].set_color('none')
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('none')
    ax.spines['left'].set_linewidth(2)
    ax.spines['bottom'].set_linewidth(2)

    # Add data to plot from
    for d in dat:

        # Text takes: [X-pos, Y-pos, word, rotation]
        #  Where X-pos is latency, y-pos & rotation are defaults given +/-
        ax.text(d[2], offsets[d[1]], d[0], rotation=rotations[d[1]], fontsize=20)

    # Save out - if requested
    if save_fig:

        db = check_db(db)
        s_file = os.path.join(db.figs_path, 'LatencyAssociations' + '.svg')

        plt.savefig(s_file, transparent=True)
Example #10
0
    def load(self, db=None):
        """Load raw data from json file."""

        db = check_db(db)

        data = _parse_json_dat(db.words_path + '/raw/' + self.label + '.json')

        for dat in data:
            self.add_id(dat['id'])
            self.add_title(dat['title'])
            self.add_journal(dat['journal'][0], dat['journal'][1])
            self.add_authors(dat['authors'])
            self.add_words(dat['words'])
            self.add_kws(dat['kws'])
            self.add_pub_date([dat['year'], dat['month']])
            self.add_doi(dat['doi'])
            self.increment_n_articles()

        self.check_results()
Example #11
0
def save_pickle_obj(obj, f_name, db=None):
    """Save a custom object from ERP-SCANR as a pickle file.

    Parameters
    ----------
    obj : {Counts() object, Words() object}
        ERP-SCANR custom object to save out.
    f_name : str
        Name to append to saved out file name.
    db : ERPDB() object, optional
        Database object for the ERP-SCANR project.
    """

    # Check for database object, initialize if not provided
    db = check_db(db)

    # If it's a Counts object, set path and name
    if isinstance(obj, Count):
        save_name = f_name + '_counts.p'
        save_path = db.counts_path

    # If it's a Words object, set path and name
    elif isinstance(obj, Words):
        save_name = f_name + '_words.p'
        save_path = db.words_path

    # If neither, raise error as object type is unclear
    else:
        raise InconsistentDataError('Object type unclear - can not save.')

    # Save out labels header file
    with open(os.path.join(save_path, 'labels.txt'), 'w') as outfile:
        for label in obj.labels:
            outfile.write("%s\n" % label)

    # Save pickle file
    save_file = os.path.join(save_path, save_name)
    pickle.dump(obj, open(save_file, 'wb'))