def mpi_target_dict(rank, mpi_shape, pbc_axes): common.check_type('mpi_shape', mpi_shape, (list, tuple), int) common.check_type('pbc_axes', pbc_axes, str) mx, my, mz = mpi_shape mpi_target_dict = { 'x-': None, 'x+': None, \ 'y-': None, 'y+': None, \ 'z-': None, 'z+': None} mycoord = my_coord(rank, mpi_shape) replace = lambda i, val: mycoord[:i] + [val] + mycoord[i + 1:] coord_to_rank = lambda (i, j, k): i + j * mx + k * mx * my for i, axis in zip([0, 1, 2], ['x', 'y', 'z']): val = mycoord[i] ms = mpi_shape[i] if val > 0: mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, val - 1)) elif val == 0 and axis in pbc_axes and ms != 1: mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, ms - 1)) if val < ms - 1: mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, val + 1)) elif val == ms - 1 and axis in pbc_axes and ms != 1: mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, 0)) return mpi_target_dict
def sentiment_analysis(load_model, label_type, embs_convert_type, label_type_folder, target_data_folder, save_folder): check_type(label_type, types_list=['tonality', 'toxicity'], type_name='label') check_type(embs_convert_type, types_list=['mean', 'length_64'], type_name='embeddings convert') x_target, y_target = load_target_data(label_type=label_type, convert_type=embs_convert_type, data_folder=target_data_folder) if not load_model: x_source, x_source_test, y_source, y_source_test = load_source_data( label_type=label_type, label_data_folder=label_type_folder, convert_type=embs_convert_type) model = train_lstm(x_source=x_source, y_source=y_source, label_type=label_type, convert_type=embs_convert_type, save_folder=save_folder, epochs=5) predict(model=model, x=x_source_test, y=y_source_test, title='Source') else: model = load_lstm(label_type=label_type, convert_type=embs_convert_type, folder=save_folder) y_pred = predict(model=model, x=x_target, y=y_target, title='Target') return y_pred
def mpi_target_dict(rank, mpi_shape, pbc_axes): common.check_type('mpi_shape', mpi_shape, (list, tuple), int) common.check_type('pbc_axes', pbc_axes, str) mx, my, mz = mpi_shape mpi_target_dict = { 'x-': None, 'x+': None, \ 'y-': None, 'y+': None, \ 'z-': None, 'z+': None} mycoord = my_coord(rank, mpi_shape) replace = lambda i, val: mycoord[:i] + [val] + mycoord[i+1:] coord_to_rank = lambda (i, j, k): i + j*mx + k*mx*my for i, axis in zip([0, 1, 2], ['x', 'y', 'z']): val = mycoord[i] ms = mpi_shape[i] if val > 0: mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, val-1)) elif val == 0 and axis in pbc_axes and ms != 1: mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, ms-1)) if val < ms-1: mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, val+1)) elif val == ms-1 and axis in pbc_axes and ms != 1: mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, 0)) return mpi_target_dict
def macro_replace_list(pt0, pt1): """ Return the replace string list correspond to macro This is used to generate the cuda kernel from the template. """ common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) x0, y0, z0 = pt0 x1, y1, z1 = pt1 snx = abs(x1 - x0) + 1 sny = abs(y1 - y0) + 1 snz = abs(z1 - z0) + 1 nmax = snx * sny * snz xid, yid, zid = x0, y0, z0 if x0 == x1 and y0 == y1 and z0 == z1: pass elif x0 != x1 and y0 == y1 and z0 == z1: xid = '(gid + %d)' % x0 elif x0 == x1 and y0 != y1 and z0 == z1: yid = '(gid + %d)' % y0 elif x0 == x1 and y0 == y1 and z0 != z1: zid = '(gid + %d)' % z0 elif x0 != x1 and y0 != y1 and z0 == z1: xid = '(gid/%d + %d)' % (sny, x0) yid = '(gid%%%d + %d)' % (sny, y0) elif x0 == x1 and y0 != y1 and z0 != z1: yid = '(gid/%d + %d)' % (snz, y0) zid = '(gid%%%d + %d)' % (snz, z0) elif x0 != x1 and y0 == y1 and z0 != z1: xid = '(gid/%d + %d)' % (snz, x0) zid = '(gid%%%d + %d)' % (snz, z0) elif x0 != x1 and y0 != y1 and z0 != z1: xid = '(gid/%d + %d)' % (sny*snz, x0) yid = '((gid/%d)%%%d + %d)' % (snz, sny, y0) zid = '(gid%%%d + %d)' % (snz, z0) return [str(nmax), str(xid), str(yid), str(zid)]
def macro_replace_list(pt0, pt1): """ Return the replace string list correspond to macro This is used to generate the opencl kernel from the template. """ common.check_type('pt0', pt0, (list, tuple), int) common.check_type('pt1', pt1, (list, tuple), int) x0, y0, z0 = pt0 x1, y1, z1 = pt1 snx = abs(x1 - x0) + 1 sny = abs(y1 - y0) + 1 snz = abs(z1 - z0) + 1 nmax = snx * sny * snz xid, yid, zid = x0, y0, z0 if x0 == x1 and y0 == y1 and z0 == z1: pass elif x0 != x1 and y0 == y1 and z0 == z1: xid = '(gid + %d)' % x0 elif x0 == x1 and y0 != y1 and z0 == z1: yid = '(gid + %d)' % y0 elif x0 == x1 and y0 == y1 and z0 != z1: zid = '(gid + %d)' % z0 elif x0 != x1 and y0 != y1 and z0 == z1: xid = '(gid/%d + %d)' % (sny, x0) yid = '(gid%%%d + %d)' % (sny, y0) elif x0 == x1 and y0 != y1 and z0 != z1: yid = '(gid/%d + %d)' % (snz, y0) zid = '(gid%%%d + %d)' % (snz, z0) elif x0 != x1 and y0 == y1 and z0 != z1: xid = '(gid/%d + %d)' % (snz, x0) zid = '(gid%%%d + %d)' % (snz, z0) elif x0 != x1 and y0 != y1 and z0 != z1: xid = '(gid/%d + %d)' % (sny * snz, x0) yid = '((gid/%d)%%%d + %d)' % (snz, sny, y0) zid = '(gid%%%d + %d)' % (snz, z0) return [str(nmax), str(xid), str(yid), str(zid)]
def accum_sub_ns_dict(mpi_shape, ndev, dnx_list, ny_list, nz_list): common.check_type('mpi_shape', mpi_shape, (tuple, list), int) common.check_type('ndev', ndev, int) common.check_type('dnx_list', dnx_list, (tuple, list), int) common.check_type('ny_list', ny_list, (tuple, list), int) common.check_type('nz_list', nz_list, (tuple, list), int) mx, my, mz = mpi_shape snx_list = [] strip_dnx_list = [] for mi in xrange(mx): sub_dnx_list = dnx_list[mi * ndev:(mi + 1) * ndev] snx_list.append(sum(sub_dnx_list) - ndev + 1) strip_dnx_list.extend([nx - 1 for nx in sub_dnx_list]) strip_dnx_list[-1] += 1 accum_sub_ns_dict = { \ 'x': np.add.accumulate([0] + snx_list), \ 'y': np.add.accumulate([0] + ny_list), \ 'z': np.add.accumulate([0] + nz_list), \ 'dx': np.add.accumulate([0] + strip_dnx_list) } return accum_sub_ns_dict
def accum_sub_ns_dict(mpi_shape, ndev, dnx_list, ny_list, nz_list): common.check_type('mpi_shape', mpi_shape, (tuple, list), int) common.check_type('ndev', ndev, int) common.check_type('dnx_list', dnx_list, (tuple, list), int) common.check_type('ny_list', ny_list, (tuple, list), int) common.check_type('nz_list', nz_list, (tuple, list), int) mx, my, mz = mpi_shape snx_list = [] strip_dnx_list = [] for mi in xrange(mx): sub_dnx_list = dnx_list[mi*ndev:(mi+1)*ndev] snx_list.append( sum(sub_dnx_list) - ndev + 1 ) strip_dnx_list.extend( [nx-1 for nx in sub_dnx_list] ) strip_dnx_list[-1] += 1 accum_sub_ns_dict = { \ 'x': np.add.accumulate([0] + snx_list), \ 'y': np.add.accumulate([0] + ny_list), \ 'z': np.add.accumulate([0] + nz_list), \ 'dx': np.add.accumulate([0] + strip_dnx_list) } return accum_sub_ns_dict
def check_mpi_shape(size, mpi_shape): common.check_type('mpi_shape', mpi_shape, (list, tuple), int) assert size == reduce(lambda x, y: x*y, mpi_shape), \ 'MPI size %d is not matched with the mpi_shape %s.' % (size, repr(mpi_shape))
def my_coord(rank, mpi_shape): common.check_type('mpi_shape', mpi_shape, (list, tuple), int) mx, my, mz = mpi_shape return [rank % mx, rank / mx % my, rank / (mx * my)]
def lda_analysis(load_model, lda_model_type, data_folder, results_folder, csv_file_name, mallet_download_folder): print("\nLDA analysis") check_type(lda_model_type, ['mallet', 'lda'], 'lda model') # Downloads print('\nDownloads') nltk.download('stopwords') if not os.path.exists(os.path.join(mallet_download_folder, 'mallet-2.0.8')): dload.save_unzip("http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip", mallet_download_folder) mallet_path = os.path.join(mallet_download_folder, 'mallet-2.0.8', 'bin', 'mallet') os.environ.update( {'MALLET_HOME': os.path.join(mallet_download_folder, 'mallet-2.0.8')}) # Load data data = pd.read_csv(os.path.join(data_folder, csv_file_name)) texts_original = data['text'].values.tolist() tonality = data['tonality'].values.tolist() # tonality = [change_class_label(value) for value in tonality] toxicity = data['toxicity'].values.tolist() # Preprocess texts texts_processed = preprocessing(texts_original) # Create dictionary id2word = corpora.Dictionary(texts_processed) # Get term document frequency corpus = [id2word.doc2bow(text) for text in texts_processed] # Get optimal model if not load_model: model = get_optimal_model(results_folder=results_folder, corpus=corpus, id2word=id2word, lda_model_type=lda_model_type, texts=texts_processed, mallet_path=mallet_path) save_lda_model(lda_model=model, save_path=os.path.join(results_folder, lda_model_type + '_model.bin')) else: model = load_lda_model( model_path=os.path.join(results_folder, lda_model_type + '_model.bin')) # Find dominant topic in each text topic_nums, topic_keywords = get_dominant_topic_df( lda_model=model, model_type=lda_model_type, corpus=corpus, texts=texts_original) # Save to excel-file df_result = pd.DataFrame({ 'texts': texts_original, 'tonality': tonality, 'toxicity': toxicity, 'dominant_topic': topic_nums, 'topic_keywords': topic_keywords }) df_result.to_excel(os.path.join(results_folder, 'results_' + lda_model_type + '.xlsx'), index=False) # Distribution of tonality and toxicity by topics plot_label_by_topic(df=df_result, label_name='tonality', model_type=lda_model_type, results_folder=results_folder) plot_label_by_topic(df=df_result, label_name='toxicity', model_type=lda_model_type, results_folder=results_folder)
def my_coord(rank, mpi_shape): common.check_type('mpi_shape', mpi_shape, (list, tuple), int) mx, my, mz = mpi_shape return [rank%mx, rank/mx%my, rank/(mx*my)]