def test_check_db(): """Test the check_db function.""" # Check that it returns an ERPDB when given None db = check_db(None) assert isinstance(db, ERPDB) # Check that it returns an ERPDb object when given one db = ERPDB() db = check_db(db) assert isinstance(db, ERPDB)
def save_summary(self, db=None): """Save out a summary of the scraped ERP paper data.""" db = check_db(db) with open(db.words_path + '/summary/' + self.label + '.json', 'w') as outfile: json.dump(self.summary, outfile)
def make_wc(freq_dist, n_words, label, disp_fig=True, save_fig=False, db=None): """Create and display wordcloud. Parameters ---------- n_words : int Number of top words to include in the wordcloud. save_fig : boolean Whether to save out the wordcloud. """ wc = create_wc(conv_freqs(freq_dist, 20)) plt.figure(figsize=(10, 10)) plt.imshow(wc) plt.axis("off") if save_fig: db = check_db(db) s_file = os.path.join(db.figs_path, 'wc', label + '.svg') plt.savefig(s_file, transparent=True) if not disp_fig: plt.close()
def plot_years(year_counts, label, disp_fig=True, save_fig=False, db=None): """Plot publications across years histogram.""" f, ax = plt.subplots(figsize=(10, 5)) yrs = set(range(1985, 2016)) # Extract x & y data to plot x_dat = [y[0] for y in year_counts] y_dat = [y[1] for y in year_counts] # Add line and points to plot plt.plot(x_dat, y_dat) plt.plot(x_dat, y_dat, '.', markersize=16) # Set plot limits plt.xlim([min(yrs), max(yrs)]) plt.ylim([0, max(y_dat)+5]) # Add title & labels plt.title('Publication History', fontsize=24, fontweight='bold') plt.xlabel('Year', fontsize=18) plt.ylabel('# Pubs', fontsize=18) if save_fig: db = check_db(db) s_file = os.path.join(db.figs_path, 'year', label + '.svg') plt.savefig(s_file, transparent=True) if not disp_fig: plt.close()
def load_pickle_obj(f_name, db=None): """Load a custom object, from a pickle file, for ERP-SCANR project. Parameters ---------- f_name : str File name of the object to be loaded. db : ERPDB object, optional Database object for the ERP-SCANR project. """ # Check for database object, initialize if not provided db = check_db(db) # Get all available files, for Count and Words pickled objects counts_objs = os.listdir(db.counts_path) words_objs = os.listdir(db.words_path) # Search for object in saved Count files, and set path if found if f_name + '.p' in counts_objs: load_path = os.path.join(db.counts_path, f_name + '.p') # Search for object in saved Words files, and set path if found elif f_name + '.p' in words_objs: load_path = os.path.join(db.words_path, f_name + '.p') # Raise an error if the file name is not found else: raise InconsistentDataError('Can not find requested file name.') # Load and return the data return pickle.load(open(load_path, 'rb'))
def plot_years(year_counts, label, disp_fig=True, save_fig=False, db=None): """Plot publications across years histogram.""" f, ax = plt.subplots(figsize=(10, 5)) yrs = set(range(1985, 2016)) # Extract x & y data to plot x_dat = [y[0] for y in year_counts] y_dat = [y[1] for y in year_counts] # Add line and points to plot plt.plot(x_dat, y_dat) plt.plot(x_dat, y_dat, '.', markersize=16) # Set plot limits plt.xlim([min(yrs), max(yrs)]) plt.ylim([0, max(y_dat) + 5]) # Add title & labels plt.title('Publication History', fontsize=24, fontweight='bold') plt.xlabel('Year', fontsize=18) plt.ylabel('# Pubs', fontsize=18) if save_fig: db = check_db(db) s_file = os.path.join(db.figs_path, 'year', label + '.svg') plt.savefig(s_file, transparent=True) if not disp_fig: plt.close()
def save(self, db=None): """Save out json file with all attached data.""" db = check_db(db) with open(db.words_path + '/raw/' + self.label + '.json', 'w') as outfile: for art in self: json.dump(art, outfile) outfile.write('\n') # Update history self.update_history('Saved')
def plot_time_assocs(dat, save_fig=False): """ Parameters ---------- dat : list of list of [str, str, int] ERP data - [association, P or N, latency] """ # Plot params offsets = {'P': 50, 'N': -50} rotations = {'P': 45, 'N': -45} # Initialize Plot fig = plt.figure(figsize=(12, 5)) fig.suptitle('ERP Correlates Across Time', fontsize=24, fontweight='bold') ax = fig.add_subplot(111) # Set plot limits ax.set_xlim([50, 600]) ax.set_ylim([-100, 100]) # Add x-ticks plt.xticks([250, 500], ['250 ms', '500 ms']) ax.set_yticks([]) # Set ticks and plot lines ax.spines['right'].set_color('none') ax.spines['bottom'].set_position('center') ax.spines['top'].set_color('none') ax.xaxis.set_ticks_position('none') ax.yaxis.set_ticks_position('none') ax.spines['left'].set_linewidth(2) ax.spines['bottom'].set_linewidth(2) # Add data to plot from for d in dat: # Text takes: [X-pos, Y-pos, word, rotation] # Where X-pos is latency, y-pos & rotation are defaults given +/- ax.text(d[2], offsets[d[1]], d[0], rotation=rotations[d[1]], fontsize=20) # Save out - if requested if save_fig: db = check_db(db) s_file = os.path.join(db.figs_path, 'LatencyAssociations' + '.svg') plt.savefig(s_file, transparent=True)
def load(self, db=None): """Load raw data from json file.""" db = check_db(db) data = _parse_json_dat(db.words_path + '/raw/' + self.label + '.json') for dat in data: self.add_id(dat['id']) self.add_title(dat['title']) self.add_journal(dat['journal'][0], dat['journal'][1]) self.add_authors(dat['authors']) self.add_words(dat['words']) self.add_kws(dat['kws']) self.add_pub_date([dat['year'], dat['month']]) self.add_doi(dat['doi']) self.increment_n_articles() self.check_results()
def save_pickle_obj(obj, f_name, db=None): """Save a custom object from ERP-SCANR as a pickle file. Parameters ---------- obj : {Counts() object, Words() object} ERP-SCANR custom object to save out. f_name : str Name to append to saved out file name. db : ERPDB() object, optional Database object for the ERP-SCANR project. """ # Check for database object, initialize if not provided db = check_db(db) # If it's a Counts object, set path and name if isinstance(obj, Count): save_name = f_name + '_counts.p' save_path = db.counts_path # If it's a Words object, set path and name elif isinstance(obj, Words): save_name = f_name + '_words.p' save_path = db.words_path # If neither, raise error as object type is unclear else: raise InconsistentDataError('Object type unclear - can not save.') # Save out labels header file with open(os.path.join(save_path, 'labels.txt'), 'w') as outfile: for label in obj.labels: outfile.write("%s\n" % label) # Save pickle file save_file = os.path.join(save_path, save_name) pickle.dump(obj, open(save_file, 'wb'))