Exemple #1
0
def mpi_target_dict(rank, mpi_shape, pbc_axes):
    common.check_type('mpi_shape', mpi_shape, (list, tuple), int)
    common.check_type('pbc_axes', pbc_axes, str)

    mx, my, mz = mpi_shape
    mpi_target_dict = {
            'x-': None, 'x+': None, \
            'y-': None, 'y+': None, \
            'z-': None, 'z+': None}

    mycoord = my_coord(rank, mpi_shape)
    replace = lambda i, val: mycoord[:i] + [val] + mycoord[i + 1:]
    coord_to_rank = lambda (i, j, k): i + j * mx + k * mx * my

    for i, axis in zip([0, 1, 2], ['x', 'y', 'z']):
        val = mycoord[i]
        ms = mpi_shape[i]

        if val > 0:
            mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, val - 1))
        elif val == 0 and axis in pbc_axes and ms != 1:
            mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, ms - 1))

        if val < ms - 1:
            mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, val + 1))
        elif val == ms - 1 and axis in pbc_axes and ms != 1:
            mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, 0))

    return mpi_target_dict
Exemple #2
0
def sentiment_analysis(load_model, label_type, embs_convert_type,
                       label_type_folder, target_data_folder, save_folder):
    check_type(label_type,
               types_list=['tonality', 'toxicity'],
               type_name='label')
    check_type(embs_convert_type,
               types_list=['mean', 'length_64'],
               type_name='embeddings convert')

    x_target, y_target = load_target_data(label_type=label_type,
                                          convert_type=embs_convert_type,
                                          data_folder=target_data_folder)

    if not load_model:
        x_source, x_source_test, y_source, y_source_test = load_source_data(
            label_type=label_type,
            label_data_folder=label_type_folder,
            convert_type=embs_convert_type)
        model = train_lstm(x_source=x_source,
                           y_source=y_source,
                           label_type=label_type,
                           convert_type=embs_convert_type,
                           save_folder=save_folder,
                           epochs=5)
        predict(model=model, x=x_source_test, y=y_source_test, title='Source')
    else:
        model = load_lstm(label_type=label_type,
                          convert_type=embs_convert_type,
                          folder=save_folder)

    y_pred = predict(model=model, x=x_target, y=y_target, title='Target')

    return y_pred
def mpi_target_dict(rank, mpi_shape, pbc_axes):
    common.check_type('mpi_shape', mpi_shape, (list, tuple), int)
    common.check_type('pbc_axes', pbc_axes, str)

    mx, my, mz = mpi_shape
    mpi_target_dict = {
            'x-': None, 'x+': None, \
            'y-': None, 'y+': None, \
            'z-': None, 'z+': None}

    mycoord = my_coord(rank, mpi_shape)
    replace = lambda i, val: mycoord[:i] + [val] + mycoord[i+1:]
    coord_to_rank = lambda (i, j, k): i + j*mx + k*mx*my

    for i, axis in zip([0, 1, 2], ['x', 'y', 'z']):
        val = mycoord[i]
        ms = mpi_shape[i]

        if val > 0:
            mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, val-1))
        elif val == 0 and axis in pbc_axes and ms != 1:
            mpi_target_dict['%s-' % axis] = coord_to_rank(replace(i, ms-1))

        if val < ms-1:
            mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, val+1))
        elif val == ms-1 and axis in pbc_axes and ms != 1:
            mpi_target_dict['%s+' % axis] = coord_to_rank(replace(i, 0))

    return mpi_target_dict
def macro_replace_list(pt0, pt1):
    """
    Return the replace string list correspond to macro

    This is used to generate the cuda kernel from the template.
    """

    common.check_type('pt0', pt0, (list, tuple), int)
    common.check_type('pt1', pt1, (list, tuple), int)

    x0, y0, z0 = pt0
    x1, y1, z1 = pt1

    snx = abs(x1 - x0) + 1
    sny = abs(y1 - y0) + 1
    snz = abs(z1 - z0) + 1

    nmax = snx * sny * snz
    xid, yid, zid = x0, y0, z0

    if x0 == x1 and y0 == y1 and z0 == z1:
        pass

    elif x0 != x1 and y0 == y1 and z0 == z1:
        xid = '(gid + %d)' % x0
	
    elif x0 == x1 and y0 != y1 and z0 == z1:
        yid = '(gid + %d)' % y0
	
    elif x0 == x1 and y0 == y1 and z0 != z1:
        zid = '(gid + %d)' % z0
	
    elif x0 != x1 and y0 != y1 and z0 == z1:
        xid = '(gid/%d + %d)' % (sny, x0)
        yid = '(gid%%%d + %d)' % (sny, y0)
	
    elif x0 == x1 and y0 != y1 and z0 != z1:
        yid = '(gid/%d + %d)' % (snz, y0)
        zid = '(gid%%%d + %d)' % (snz, z0)
	
    elif x0 != x1 and y0 == y1 and z0 != z1:
        xid = '(gid/%d + %d)' % (snz, x0)
        zid = '(gid%%%d + %d)' % (snz, z0)
	
    elif x0 != x1 and y0 != y1 and z0 != z1:
        xid = '(gid/%d + %d)' % (sny*snz, x0)
        yid = '((gid/%d)%%%d + %d)' % (snz, sny, y0)
        zid = '(gid%%%d + %d)' % (snz, z0)
	
    return [str(nmax), str(xid), str(yid), str(zid)]
def macro_replace_list(pt0, pt1):
    """
    Return the replace string list correspond to macro

    This is used to generate the opencl kernel from the template.
    """

    common.check_type('pt0', pt0, (list, tuple), int)
    common.check_type('pt1', pt1, (list, tuple), int)

    x0, y0, z0 = pt0
    x1, y1, z1 = pt1

    snx = abs(x1 - x0) + 1
    sny = abs(y1 - y0) + 1
    snz = abs(z1 - z0) + 1

    nmax = snx * sny * snz
    xid, yid, zid = x0, y0, z0

    if x0 == x1 and y0 == y1 and z0 == z1:
        pass

    elif x0 != x1 and y0 == y1 and z0 == z1:
        xid = '(gid + %d)' % x0

    elif x0 == x1 and y0 != y1 and z0 == z1:
        yid = '(gid + %d)' % y0

    elif x0 == x1 and y0 == y1 and z0 != z1:
        zid = '(gid + %d)' % z0

    elif x0 != x1 and y0 != y1 and z0 == z1:
        xid = '(gid/%d + %d)' % (sny, x0)
        yid = '(gid%%%d + %d)' % (sny, y0)

    elif x0 == x1 and y0 != y1 and z0 != z1:
        yid = '(gid/%d + %d)' % (snz, y0)
        zid = '(gid%%%d + %d)' % (snz, z0)

    elif x0 != x1 and y0 == y1 and z0 != z1:
        xid = '(gid/%d + %d)' % (snz, x0)
        zid = '(gid%%%d + %d)' % (snz, z0)

    elif x0 != x1 and y0 != y1 and z0 != z1:
        xid = '(gid/%d + %d)' % (sny * snz, x0)
        yid = '((gid/%d)%%%d + %d)' % (snz, sny, y0)
        zid = '(gid%%%d + %d)' % (snz, z0)

    return [str(nmax), str(xid), str(yid), str(zid)]
Exemple #6
0
def accum_sub_ns_dict(mpi_shape, ndev, dnx_list, ny_list, nz_list):
    common.check_type('mpi_shape', mpi_shape, (tuple, list), int)
    common.check_type('ndev', ndev, int)
    common.check_type('dnx_list', dnx_list, (tuple, list), int)
    common.check_type('ny_list', ny_list, (tuple, list), int)
    common.check_type('nz_list', nz_list, (tuple, list), int)

    mx, my, mz = mpi_shape

    snx_list = []
    strip_dnx_list = []
    for mi in xrange(mx):
        sub_dnx_list = dnx_list[mi * ndev:(mi + 1) * ndev]
        snx_list.append(sum(sub_dnx_list) - ndev + 1)
        strip_dnx_list.extend([nx - 1 for nx in sub_dnx_list])
        strip_dnx_list[-1] += 1

    accum_sub_ns_dict = { \
            'x': np.add.accumulate([0] + snx_list), \
            'y': np.add.accumulate([0] + ny_list), \
            'z': np.add.accumulate([0] + nz_list), \
            'dx': np.add.accumulate([0] + strip_dnx_list) }

    return accum_sub_ns_dict
def accum_sub_ns_dict(mpi_shape, ndev, dnx_list, ny_list, nz_list):
    common.check_type('mpi_shape', mpi_shape, (tuple, list), int)
    common.check_type('ndev', ndev, int)
    common.check_type('dnx_list', dnx_list, (tuple, list), int)
    common.check_type('ny_list', ny_list, (tuple, list), int)
    common.check_type('nz_list', nz_list, (tuple, list), int)

    mx, my, mz = mpi_shape

    snx_list = []
    strip_dnx_list = []
    for mi in xrange(mx):
        sub_dnx_list = dnx_list[mi*ndev:(mi+1)*ndev]
        snx_list.append( sum(sub_dnx_list) - ndev + 1 )
        strip_dnx_list.extend( [nx-1 for nx in sub_dnx_list] )
        strip_dnx_list[-1] += 1

    accum_sub_ns_dict = { \
            'x': np.add.accumulate([0] + snx_list), \
            'y': np.add.accumulate([0] + ny_list), \
            'z': np.add.accumulate([0] + nz_list), \
            'dx': np.add.accumulate([0] + strip_dnx_list) }

    return accum_sub_ns_dict
Exemple #8
0
def check_mpi_shape(size, mpi_shape):
    common.check_type('mpi_shape', mpi_shape, (list, tuple), int)

    assert size == reduce(lambda x, y: x*y, mpi_shape), \
            'MPI size %d is not matched with the mpi_shape %s.' % (size, repr(mpi_shape))
Exemple #9
0
def my_coord(rank, mpi_shape):
    common.check_type('mpi_shape', mpi_shape, (list, tuple), int)

    mx, my, mz = mpi_shape
    return [rank % mx, rank / mx % my, rank / (mx * my)]
Exemple #10
0
def lda_analysis(load_model, lda_model_type, data_folder, results_folder,
                 csv_file_name, mallet_download_folder):

    print("\nLDA analysis")
    check_type(lda_model_type, ['mallet', 'lda'], 'lda model')

    # Downloads
    print('\nDownloads')
    nltk.download('stopwords')
    if not os.path.exists(os.path.join(mallet_download_folder,
                                       'mallet-2.0.8')):
        dload.save_unzip("http://mallet.cs.umass.edu/dist/mallet-2.0.8.zip",
                         mallet_download_folder)
    mallet_path = os.path.join(mallet_download_folder, 'mallet-2.0.8', 'bin',
                               'mallet')
    os.environ.update(
        {'MALLET_HOME': os.path.join(mallet_download_folder, 'mallet-2.0.8')})

    # Load data
    data = pd.read_csv(os.path.join(data_folder, csv_file_name))
    texts_original = data['text'].values.tolist()
    tonality = data['tonality'].values.tolist()
    # tonality = [change_class_label(value) for value in tonality]
    toxicity = data['toxicity'].values.tolist()

    # Preprocess texts
    texts_processed = preprocessing(texts_original)

    # Create dictionary
    id2word = corpora.Dictionary(texts_processed)

    # Get term document frequency
    corpus = [id2word.doc2bow(text) for text in texts_processed]

    # Get optimal model
    if not load_model:
        model = get_optimal_model(results_folder=results_folder,
                                  corpus=corpus,
                                  id2word=id2word,
                                  lda_model_type=lda_model_type,
                                  texts=texts_processed,
                                  mallet_path=mallet_path)
        save_lda_model(lda_model=model,
                       save_path=os.path.join(results_folder,
                                              lda_model_type + '_model.bin'))
    else:
        model = load_lda_model(
            model_path=os.path.join(results_folder, lda_model_type +
                                    '_model.bin'))

    # Find dominant topic in each text
    topic_nums, topic_keywords = get_dominant_topic_df(
        lda_model=model,
        model_type=lda_model_type,
        corpus=corpus,
        texts=texts_original)

    # Save to excel-file
    df_result = pd.DataFrame({
        'texts': texts_original,
        'tonality': tonality,
        'toxicity': toxicity,
        'dominant_topic': topic_nums,
        'topic_keywords': topic_keywords
    })
    df_result.to_excel(os.path.join(results_folder,
                                    'results_' + lda_model_type + '.xlsx'),
                       index=False)

    # Distribution of tonality and toxicity by topics
    plot_label_by_topic(df=df_result,
                        label_name='tonality',
                        model_type=lda_model_type,
                        results_folder=results_folder)
    plot_label_by_topic(df=df_result,
                        label_name='toxicity',
                        model_type=lda_model_type,
                        results_folder=results_folder)
Exemple #11
0
def check_mpi_shape(size, mpi_shape):
    common.check_type('mpi_shape', mpi_shape, (list, tuple), int)

    assert size == reduce(lambda x, y: x*y, mpi_shape), \
            'MPI size %d is not matched with the mpi_shape %s.' % (size, repr(mpi_shape))
Exemple #12
0
def my_coord(rank, mpi_shape):
    common.check_type('mpi_shape', mpi_shape, (list, tuple), int)

    mx, my, mz = mpi_shape
    return [rank%mx, rank/mx%my, rank/(mx*my)]