Python PreProcessorの例、PreProcessor.PreProcessor Pythonの例

コード例 #1

0

ファイルを表示

def ioput_baseline_predict(ff):
    with open(ff, 'r') as f:
        text = f.read().splitlines()

    output_yaml = {}

    for i, orig_text in enumerate(text):
        rp = RParser()
        #Preprocess
        pre_p = PreProcessor()
        process_text, sub_table = pre_p.process([orig_text.strip()])
        sen = process_text[0]
        #RParse
        verb_parent, method_parent = rp.parse_v_method(sen)
        ioput_phrases = rp.parse_input_output(sen, verb_parent, method_parent)
        NPs = rp.return_NPs(sen)
        #Postprocess
        #post_p = PostProcessor(verb_parent, method_parent, ioput_phrases, orig_text.strip(), sub_table)
        #summary = post_p.process()
        summary = {}
        summary['action1'] = {}
        for ioput_seq_num, np in enumerate(NPs):
            summary['action1']['input_output_{}'.format(ioput_seq_num +
                                                        1)] = repr(np)

        output_yaml['sen{}'.format(i + 1)] = summary

    file_path = ff.split('/')
    output_file_name_list = file_path[-1].split('.')
    output_file_name_list[1] = 'ioput_baseline'
    output_file_name_list[2] = 'yaml'
    file_path[-1] = '.'.join(output_file_name_list)
    output_file_path = '/'.join(file_path)
    with open(output_file_path, 'w') as f:
        f.write(yaml.dump(output_yaml, default_flow_style=False))

コード例 #2

0

ファイルを表示

 def pre_process(self):
     pre_processor = PreProcessor(self.path, pickle_arb_id_filename,
                                  pickle_j1979_filename, self.use_j1979)
     pid_dictionary = pre_processor.import_pid_dict(pid_file)
     id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(
         a_timer, tang_normalize_strategy, pid_dictionary, time_conversion,
         freq_analysis_accuracy, freq_synchronous_threshold,
         force_pre_processing)
     if dump_to_pickle:
         if force_pre_processing:
             if path.isfile(pickle_arb_id_filename):
                 remove(pickle_arb_id_filename)
             if path.isfile(pickle_j1979_filename):
                 remove(pickle_j1979_filename)
         # Lexical analysis will add additional information to the Arb ID dict. Don't dump if you're going to
         # immediately delete and replace pickle_arb_id_filename during Lexical Analysis.
         if not force_lexical_analysis:
             if not path.isfile(pickle_arb_id_filename) and id_dictionary:
                 print("\nDumping arb ID dictionary for " +
                       self.output_vehicle_dir + " to " +
                       pickle_arb_id_filename)
                 dump(id_dictionary, open(pickle_arb_id_filename, "wb"))
                 print("\tComplete...")
             if not path.isfile(pickle_j1979_filename) and j1979_dictionary:
                 print("\nDumping J1979 dictionary for " +
                       self.output_vehicle_dir + " to " +
                       pickle_j1979_filename)
                 dump(j1979_dictionary, open(pickle_j1979_filename, "wb"))
                 print("\tComplete...")
     return id_dictionary, j1979_dictionary, pid_dictionary

コード例 #3

0

ファイルを表示

ファイル: jpg_to_emoji_convertor.py プロジェクト: frmsaul/Emojifier

def jpg_to_emoji(original_image, work_location, output_file, company_name,
                 emojis_in_width, emoji_size, do_preprocessing, use_kd_tree):

    if (do_preprocessing):
        PreProcessor.preprocess_emojis(work_location)

    emoji_list = get_filtered_emoji_list(work_location, company_name)
    valid_emojis_dict = {
        "emojis": emoji_list
    }

    emoji_mapper = EmojiMapper.EmojiMapper(emoji_dict=valid_emojis_dict,
                                           use_kd_tree=use_kd_tree)
    t_before_getting_grid = time.time()
    emoji_grid = image_to_emoji_grid(misc.imread(original_image),
                                     emojis_in_width, emoji_mapper)
    t_after_getting_grid = time.time()

    print("Time to Get Emoji grid: %s" %
          str(t_after_getting_grid - t_before_getting_grid))

    if (output_file.endswith(".html")):
        # Write into local html file.
        emoji_grid_to_html_file(emoji_grid, output_file, emoji_size)
    elif (output_file.endswith(".png")):
        # Write to png
        image_array = emoji_grid_to_image(emoji_grid, emoji_size)
        misc.imsave(output_file, image_array)
    else:
        print "UnknownFileName"

    t_after_getting_output_file = time.time()
    print("Time to Build output file: %s" %
          str(t_after_getting_output_file - t_after_getting_grid))

コード例 #4

0

ファイルを表示

ファイル: ioput_baseline_predict.py プロジェクト: shaunrong/RParser

def ioput_baseline_predict(ff):
    with open(ff, 'r') as f:
        text = f.read().splitlines()

    output_yaml = {}

    for i, orig_text in enumerate(text):
        rp = RParser()
        #Preprocess
        pre_p = PreProcessor()
        process_text, sub_table = pre_p.process([orig_text.strip()])
        sen = process_text[0]
        #RParse
        verb_parent, method_parent = rp.parse_v_method(sen)
        ioput_phrases = rp.parse_input_output(sen, verb_parent, method_parent)
        NPs = rp.return_NPs(sen)
        #Postprocess
        #post_p = PostProcessor(verb_parent, method_parent, ioput_phrases, orig_text.strip(), sub_table)
        #summary = post_p.process()
        summary = {}
        summary['action1'] = {}
        for ioput_seq_num, np in enumerate(NPs):
            summary['action1']['input_output_{}'.format(ioput_seq_num+1)] = repr(np)

        output_yaml['sen{}'.format(i+1)] = summary

    file_path = ff.split('/')
    output_file_name_list = file_path[-1].split('.')
    output_file_name_list[1] = 'ioput_baseline'
    output_file_name_list[2] = 'yaml'
    file_path[-1] = '.'.join(output_file_name_list)
    output_file_path = '/'.join(file_path)
    with open(output_file_path, 'w') as f:
        f.write(yaml.dump(output_yaml, default_flow_style=False))

コード例 #5

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: ChristopherHaynes/Atari-2600-Deep-Learning-Agent

    def test_downsample(self):

        # Create a preprocessor object and define a test greyscale array (8-bit)
        preprocess = PreProcessor()
        testGreyFrame = np.random.randint(255, size=(210, 160))

        # Use the PP to half the size of the input
        halfFrame = preprocess.halfDownsample(testGreyFrame)

        # Ensure the frame has reduced in size by half
        self.assertEqual(testGreyFrame.shape[0] / 2, halfFrame.shape[0])
        self.assertEqual(testGreyFrame.shape[1] / 2, halfFrame.shape[1])

コード例 #6

0

ファイルを表示

    def transPrepare(self):

        ctcaeinfo = AeConverter.ctcaeInfo(self)
        valuedic = AeConverter.lrInfo(self)

        rules = ctcaeinfo['rules']
        paraunit = ctcaeinfo['unit']

        preparation = PreProcessor(self.dbConnection, valuedic, rules,
                                   paraunit, self.unittrans)

        rulesTransfered = preparation.rulesPrepare()

        return rulesTransfered  # the format of this list is like [ { rule_grade1:'a string', rule_grade2:'a string',...rule_grade5:'a string'} ,

コード例 #7

0

ファイルを表示

def run():
    data_loader = DataLoader(data_dir_root, data_train_file, data_test_file)
    raw_train_df, raw_test_df = data_loader.load_csv_data()
    data_loader.print_statistics()

    pre_processor = PreProcessor(
        raw_train_df,
        raw_test_df,
        cols_to_consider=cols_to_consider,
        # cols_to_consider=raw_train_df.columns[0:-1],
        target_feature='SalePrice')
    pre_processor.pre_process_data()

    print_features_info(pre_processor.raw_train_df,
                        pre_processor.clean_train_df)
    plot_target_feature(pre_processor.raw_train_df,
                        pre_processor.target_feature)
    plot_features_hist(pre_processor.raw_train_df)
    plot_correlation_numeric_features(pre_processor.clean_train_df)

    train_X, train_y = prepare_data(
        pre_processor.clean_train_df,
        class_col=pre_processor.target_feature,
        reg_encoding_features=[],
        one_hot_encoding_features=one_hot_encod_features,
        ordinal_encoding_features=features_ordinal_mappings,
        no_enc_features=no_enc_features)

    test_X, test_y = prepare_data(
        pre_processor.clean_test_df,
        class_col=pre_processor.target_feature,
        reg_encoding_features=[],
        one_hot_encoding_features=one_hot_encod_features,
        ordinal_encoding_features=features_ordinal_mappings,
        no_enc_features=no_enc_features)
    evaluator = Predictor(train_X, train_y, test_X, test_y, eval_classifiers,
                          eval_classifiers_params_grid)

    all_predictions, final_prediction = evaluator.build_models(
        grid_search=False)
    evaluation_df = evaluator.save_predictions_to_df(all_predictions,
                                                     final_prediction)
    submission_df = evaluator.save_predictions_for_submission(
        evaluation_df, id_col=pre_processor.raw_test_df['Id'])
    evaluation_df.to_csv("test_evaluation_results.csv", index=False)
    submission_df.to_csv("test_submission.csv", index=False)

コード例 #8

0

ファイルを表示

ファイル: UnitTests.py プロジェクト: ChristopherHaynes/Atari-2600-Deep-Learning-Agent

    def test_greyscale(self):

        # Create a preprocessor object and define a test RGB array (8-bit)
        preprocess = PreProcessor()
        testObservation = np.random.randint(255, size=(210, 160, 3))

        # Use the PP to convert to greyscale
        greyTest = preprocess.toGreyScale(testObservation)

        # Ensure the dimsonality has reduced
        self.assertEqual(greyTest.shape, (210, 160))

        # Ensure the greyscale has been applied correctly
        expectedGreyValue = int(
            (testObservation[0, 0, 0] + testObservation[0, 0, 1] +
             testObservation[0, 0, 2]) / 3)
        self.assertEqual(expectedGreyValue, greyTest[0, 0])

コード例 #9

0

ファイルを表示

ファイル: asthma_classifier.py プロジェクト: Shokesu/tweet-classifier-python

def main():
    dirList = list()

    temp = listdir("./data/Asthma/2010/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2011/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2012/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2013/")
    dirList.append(temp)
    temp = listdir("./data/Asthma/2014/")
    dirList.append(temp)

    pre = PreProcessor()
    clf = SVM(kernel=GaussianKernel(5.0), C=1.0)

    X_train, y_train = pre.loadTrainingSet(
        "training_data/Asthma_Sample_Tokenized.csv")
    clf.fit(X_train, y_train)

コード例 #10

0

ファイルを表示

ファイル: Labeling.py プロジェクト: Aesrky/Portfolio-Datascience

    def __init__(self, trainDF):
        super().__init__()
        prePro = PreProcessor()
        self.pf = PlotFunctions()
        self.trainDF = trainDF
        self.X_train, self.X_test, self.y_train, self.y_test = \
            prePro.split_train_test(trainDF['cleaned_sentence'], trainDF['classification'], 0.4)
        self.X_test, self.X_cross, self.y_test, self.y_cross = \
            prePro.split_train_test(self.X_test, self.y_test, 0.5)

        self.all_scores = list()
        self.models = {
            'MultinomialNB':
            naive_bayes.MultinomialNB(alpha=0.767,
                                      class_prior=None,
                                      fit_prior=True),
            'ComplementNB':
            naive_bayes.ComplementNB(alpha=0.767,
                                     class_prior=None,
                                     fit_prior=True),
            'LogisticRegression':
            linear_model.LogisticRegression(solver='lbfgs')
        }

コード例 #11

0

ファイルを表示

def main():
    preprocesser = PreProcessor()
    mm = ModuleManager()

    def generate_random_points_on_hyperellipsoid(vol_data,
                                                 cor_data,
                                                 alpha_vec=np.array(
                                                     [0.9, 0.95, 0.975, 0.99]),
                                                 n_sample=int(1e4),
                                                 dim=30):
        header = alpha_vec
        result = pd.DataFrame(columns=header)
        for i in range(vol_data.shape[0]):
            start_time = time.time()
            var_estimates = []
            vol_mat = np.diag(vol_data.iloc[i, :])
            cor_mat = preprocesser.construct_correlation_matrix(
                corr_vec=cor_data.iloc[i, :], n=dim)
            H = preprocesser.construct_covariance_matrix(vol_matrix=vol_mat,
                                                         corr_matrix=cor_mat)
            r = np.random.randn(H.shape[0], n_sample)
            # u contains random points on the unit hypersphere
            u = r / np.linalg.norm(r, axis=0)
            for alpha in alpha_vec:
                y = np.sqrt(chi2.ppf(q=alpha, df=dim))
                # Transform points on the unit hypersphere to the hyperellipsoid
                xrandom = sqrtm(H).dot(np.sqrt(y) * u)
                # Compute the lowest (equally) weighted average of random points on the hyperellipsoid.
                # This is the maximum loss with alpha percent probability, i.e. Value-at-Risk
                xrandom_min = np.max(
                    np.abs(np.array([np.mean(x) for x in xrandom.T])))
                var_estimates.append(xrandom_min)
            result = pd.merge(result,
                              pd.DataFrame(np.asarray(var_estimates).reshape(
                                  1, -1),
                                           columns=header),
                              how='outer')
            print((i, time.time() - start_time))
        return result

    ##################################################################################################################
    ###                                      Multivariate Quantile Computation                                     ###
    ##################################################################################################################
    dim = 30
    vol_data = mm.load_data(
        'multivariate_analysis/volatilities_garch_norm_DJI30_2000_2001.pkl')
    #cor_data = mm.load_data('multivariate_analysis/cor_DCC_mvnorm_DJI30_1994_1995.pkl')
    cor_data = mm.load_data(
        'multivariate_analysis/pearson/pearson_cor_estimates/cor_knn5_pearson_10_DJI30_2000_2001.pkl'
    )

    result = generate_random_points_on_hyperellipsoid(vol_data=vol_data,
                                                      cor_data=cor_data)
    print(result)
    #mm.save_data('multivariate_analysis/VaR/var_dcc_mvnorm_1994_1995_nsample_1e6.pkl', result)
    #mm.transform_pickle_to_csv('multivariate_analysis/VaR/var_dcc_mvnorm_1994_1995_nsample_1e6.pkl')
    mm.save_data(
        'multivariate_analysis/VaR/var_knn5_pearson_garch_2000_2001_nsample_1e5_sqrt_chi2.pkl',
        result)
    mm.transform_pickle_to_csv(
        'multivariate_analysis/VaR/var_knn5_pearson_garch_2000_2001_nsample_1e5_sqrt_chi2.pkl'
    )

コード例 #12

0

ファイルを表示

ファイル: MultiStockPreProcessor.py プロジェクト: Billzabob/CS5140_StocksRock-

 def preProcessCompany(company, start, end):
     PP = PreProcessor(company, start=start, end=end)
     PP.csv_indices()

コード例 #13

0

ファイルを表示

ファイル: Agent.py プロジェクト: ChristopherHaynes/Atari-2600-Deep-Learning-Agent

targetNetUpdate = 10000  # Number of steps between updating the target network to the value network

#-----------------------------------------------------------------------
# Trial - Run through a number of episodes, save data and record results
#-----------------------------------------------------------------------
# Trial scope variables
trialSteps = 0
# Total number of steps taken in the current trail
epsilon = epsilonMax
# Current epsilon value

# Initialise the environment
env = gym.make(GAME_NAME + 'Deterministic-v4')

# Prepares each frame to produce a viable network input
preProcessor = PreProcessor()

# Stores experiences as a tuple of [s, a, r, s']
memory = ExperienceReplay(replaySize)

# Records results and stores them in a csv file
resultsRec = ResultsRecorder(GAME_NAME)

# Q Value approximation and target networks, set with as many output nodes as viable actions
valueNetwork = DeepQnetwork(len(env.unwrapped.get_action_meanings()), alpha)
targetNetwork = DeepQnetwork(len(env.unwrapped.get_action_meanings()), alpha)
init = tf.global_variables_initializer()

# Create tensorflow model saver
saver = tf.train.Saver()

コード例 #14

0

ファイルを表示

from PreProcessor import PreProcessor
from RParser import RParser

__author__ = 'Shaun Rong'
__version__ = '0.1'
__maintainer__ = 'Shaun Rong'
__email__ = '*****@*****.**'

with open('environ.yaml', 'r') as f:
    env = yaml.load(f)

stanford_parser_folder = env['stanford_parser_folder']

os.environ['STANFORD_PARSER'] = stanford_parser_folder
os.environ['STANFORD_MODELS'] = stanford_parser_folder

cfuf = PreProcessor()
with open('data/3.raw.txt', 'r') as f:
    text = f.read().splitlines()

process_text, sub_table = cfuf.process(text)

sen = process_text[3]

rp = RParser()

verb_parent, method_parent = rp.parse_v_method(sen)

print verb_parent
print method_parent

コード例 #15

0

ファイルを表示

ファイル: IMDBSATrainer.py プロジェクト: TylerCollison/TwitterStockMarketPredictor

# glove2word2vec(glove_input_file="../wordVec/glove/glove.6B.50d.txt", word2vec_output_file="WordVectors/gensim_glove_wiki_vectors.txt")

# Load and save the word vectors and index map
model = gensim.models.KeyedVectors.load_word2vec_format("WordVectors/gensim_glove_wiki_vectors.txt", binary=False)
wordVectors = model.syn0
wordsList = model.index2word
wordMap = {wordsList[i]: i for i in range(len(wordsList))}
np.save("IMDBSA/wordMap", wordMap)
np.save("IMDBSA/wordVectors", wordVectors)

# Find the training data
positiveFiles = ['./Data/IMDBData/train/pos/' + f for f in os.listdir('./Data/IMDBData/train/pos/') if os.path.isfile(os.path.join('./Data/IMDBData/train/pos/', f))]
negativeFiles = ['./Data/IMDBData/train/neg/' + f for f in os.listdir('./Data/IMDBData/train/neg/') if os.path.isfile(os.path.join('./Data/IMDBData/train/neg/', f))]

# Initialize the pre-processor and sentiment analyzer
processor = PreProcessor()
analyzer = SentimentAnalyzer(MAX_SEQUENCE_LENGTH, BATCH_SIZE, LSTM_UNITS, LEARNING_RATE, wordMap, wordVectors)

# Load and process the training data
negativeSamples = []
positiveSamples = []
for pf in positiveFiles:
    with open(pf, "r", encoding="utf8") as f:
       lines = f.readlines()
       positiveSamples.extend(processor.cleanTextList(lines))
       print("Cleaned positive document: " + pf)

for nf in negativeFiles:
    with open(nf, "r", encoding="utf8") as f:
       lines = f.readlines()
       negativeSamples.extend(processor.cleanTextList(lines))

コード例 #16

0

ファイルを表示

ファイル: Main.py プロジェクト: zoraj/CAN_Reverse_Engineering

freq_synchronous_threshold = 0.1

# Threshold parameters used during lexical analysis.
tokenization_bit_distance: float = 0.2
tokenize_padding: bool = True

# Threshold parameters used during semantic analysis
subset_selection_size: float = 0.25
fuzzy_labeling: bool = True
min_correlation_threshold: float = 0.85

# A timer class to record timings throughout the pipeline.
a_timer = PipelineTimer(verbose=True)

#            DATA IMPORT AND PRE-PROCESSING             #
pre_processor = PreProcessor(can_data_filename, pickle_arb_id_filename,
                             pickle_j1979_filename)
id_dictionary, j1979_dictionary = pre_processor.generate_arb_id_dictionary(
    a_timer, tang_normalize_strategy, time_conversion, freq_analysis_accuracy,
    freq_synchronous_threshold, force_pre_processing)
if j1979_dictionary:
    plot_j1979(a_timer, j1979_dictionary, force_j1979_plotting)

#                 LEXICAL ANALYSIS                     #
print("\n\t\t\t##### BEGINNING LEXICAL ANALYSIS #####")
tokenize_dictionary(a_timer,
                    id_dictionary,
                    force_lexical_analysis,
                    include_padding=tokenize_padding,
                    merge=True,
                    max_distance=tokenization_bit_distance)
signal_dictionary = generate_signals(a_timer, id_dictionary,

コード例 #17

0

ファイルを表示

ファイル: Labeling.py プロジェクト: Aesrky/Portfolio-Datascience

                             features, 'tfidf_char', 'test')

            # Cross Validation predictions
            self.check_model(classifier, xcross_tfidf, self.y_cross,
                             model_name, features, 'tfidf_char', 'cross')

    def get_and_print_all_scores(self):
        print('Running for count_vectors')
        for i in range(500, 5000, 500):
            self.count_vectors(i)
            self.tfidf_words(i)
            self.tfidf_ngram(i)
            self.tfidf_char(i)


imp = Importer()
trainDF = imp.ImportFuncs.read_csv_into_dataframe(
    'csv_classification/Multi-class/classified_sentences_all.csv')

prePro = PreProcessor()
trainDF = prePro.clean_dataframe_for_training(trainDF)
print(trainDF.head())

a = MultiClassifier(trainDF)
a.get_and_print_all_scores()

print(a.all_scores)

exp = Exporter()
exp.create_csv_scores(a.all_scores, 'all_scores_cleaned')

コード例 #18

0

ファイルを表示

import gensim
from Indexer import Index
from PreProcessor import PreProcessor
from QueryProcessor import QueryProcessor
from tkinter import *
from tkinter.messagebox import *


prScores = None
index = None
dataset = None
stemmer = PorterStemmer()
N = 0

print("Initializing PreProcessor...")
preprocessor = PreProcessor()

try:
	with open('pgrankscores', 'rb') as inf:
	    prScores = pickle.load(inf)
except:
	print("Please place the 'pgrankscores' pickle file in the same dir as this script or run runCrawler.py first to calculate the pagerank scores...")
	exit()

try:
	with open('index', 'rb') as indf:
	    index = pickle.load(indf)
	    N = index.N
except:
	print("Please place the 'index' pickle file in the same dir as this script...")

コード例 #19

0

ファイルを表示

ファイル: scratch2.py プロジェクト: shaunrong/RParser

__author__ = 'Shaun Rong'
__version__ = '0.1'
__maintainer__ = 'Shaun Rong'
__email__ = '*****@*****.**'


with open('../environ.yaml', 'r') as f:
    env = yaml.load(f)

stanford_parser_folder = env['stanford_parser_folder']

os.environ['STANFORD_PARSER'] = stanford_parser_folder
os.environ['STANFORD_MODELS'] = stanford_parser_folder

cfuf = PreProcessor()
with open('../data/verb_method_arg/test/paper0105.raw.txt', 'r') as f:
    text = f.read().splitlines()

orig_text = text[4]

process_text, sub_table = cfuf.process([orig_text.strip()])

sen = process_text[0]

print sen

parser = stanford.StanfordParser(model_path=env['model_path'])
sentences = parser.raw_parse(sen)

ROOT = 'ROOT'

コード例 #20

0

ファイルを表示

ファイル: scratch3.py プロジェクト: shaunrong/RParser

__author__ = 'Shaun Rong'
__version__ = '0.1'
__maintainer__ = 'Shaun Rong'
__email__ = '*****@*****.**'


with open('environ.yaml', 'r') as f:
    env = yaml.load(f)

stanford_parser_folder = env['stanford_parser_folder']

os.environ['STANFORD_PARSER'] = stanford_parser_folder
os.environ['STANFORD_MODELS'] = stanford_parser_folder

cfuf = PreProcessor()
with open('data/3.raw.txt', 'r') as f:
    text = f.read().splitlines()

process_text, sub_table = cfuf.process(text)


sen = process_text[3]

rp = RParser()

verb_parent, method_parent = rp.parse_v_method(sen)

print verb_parent
print method_parent

コード例 #21

0

ファイルを表示

    os.environ['STANFORD_PARSER'] = stanford_parser_folder
    os.environ['STANFORD_MODELS'] = stanford_parser_folder

    tree_parser = stanford.StanfordParser(model_path=env['model_path'])

    for ff in train_file:
        with open(os.path.join(args.f, "{}.raw.txt".format(ff)), 'r') as f:
            text = f.read().splitlines()

        with open(os.path.join(args.f, "{}.gold.yaml".format(ff)), 'r') as f:
            gold_ticket = yaml.load(f)

        for i, orig_text in enumerate(text):
            rp = RParser()
            #Preprocess
            pre_p = PreProcessor()
            process_text, sub_table = pre_p.process([orig_text.strip()])
            sen = process_text[0]
            #RParse
            NPs = rp.return_NPs(sen)

            gold_NPs = extract_gold_NPs(gold_ticket['sen{}'.format(i+1)], sub_table, tree_parser)

            for NP in NPs:
                if NP in gold_NPs:
                    train_summary['input_output'].append(repr(NP))
                else:
                    train_summary['else'].append(repr(NP))

    with open('ioput_train_db.yaml', 'w') as f:
        f.write(yaml.dump(train_summary, default_flow_style=False))

コード例 #22

0

ファイルを表示

from PreProcessor import PreProcessor
from Processor_Clustering import Processor_Clustering
from postprocess_lidar import postprocess_lidar


scaling = 100

# define preprocessor to get lidar data
prep = PreProcessor(scaling)

#prep.sendUsingDust()
#prep.receiveUsingDust()


# get range and data
ranges_data = prep.processCSV('/home/thomas/Github/Object_Detection/lidar/lidar_data/2020-12-03-17-26-18/scan.csv')



# convert to euclidean notation
ranges_data_euclidean = prep.convertToEuclidean(ranges_data)

# get the array to apply clustering
ranges_data3 = prep.prepareArray(ranges_data_euclidean)

# apply the clustering algorithm
clustering = Processor_Clustering(ranges_data3,scaling)

コード例 #23

0

ファイルを表示

ファイル: ShadowMap.py プロジェクト: Blakkis/Wastadium

class Shadows(GlobalGameData, TkWorldDataShared):

    # Shadow table size
    shadow_map_size = 0, 0

    # Shadow world size
    shadow_world_size = 0, 0

    # 2D map of each cell
    shadow_map = []

    def __init__(self):
        self.s_shadow_surf = self.tk_surface(self.tk_resolution)
        if self.tk_shadow_quality:
            self.s_shadow_surf.set_colorkey(self.tk_shadow_mask_color)

        self.s_buildShadowDirMap()

    def s_buildShadowDirMap(self):
        """
            TBD
        """
        self.s_shadow_dir_map = []

        # Every cell has one line to work as guides for the shadow quadrilateral
        # Line meaning align the line clockwise diagonally to cast the shadows

        # Finetune these to workout the line inside the wall segment (Doesn't peek out from the wall)
        # These are offsets from topleft of the wall
        axis_dir = {
            0: [0, 31, 31, 31],  # Top
            90: [31, 32, 31, 0],  # Left
            180: [32, 1, 0, 1],  # Down
            270: [1, 0, 1, 32]
        }  # Right

        non_axis_dir = {
            45: [0, 32, 32, 0],  # TopLeft
            315: [0, 0, 32, 32],  # TopRight
            135: [32, 32, 0, 0],  # DownLeft
            225: [32, 0, 0, 32]
        }  # DownRight

        # Find the 2d array mid point
        x_mid_value = range(sum(self.tk_shadow_minmax_x))
        x_mid_value = x_mid_value[len(x_mid_value) / 2]

        y_mid_value = range(sum(self.tk_shadow_minmax_y))
        y_mid_value = y_mid_value[len(y_mid_value) / 2]

        y_range_slide = self.tk_chain(xrange(1, y_mid_value + 1),
                                      xrange(y_mid_value + 1, 0, -1))

        for e1, y in enumerate(xrange(sum(self.tk_shadow_minmax_y))):
            y_range_value = y_range_slide.next()

            x_range_slide = self.tk_chain(xrange(1, x_mid_value + 1),
                                          xrange(x_mid_value + 1, 0, -1))
            row = []

            for e2, x in enumerate(xrange(sum(self.tk_shadow_minmax_x))):
                x_range_value = x_range_slide.next()

                # Get Angle to each cell from the middle for shadow cast line
                _dir = int(
                    self.tk_degrees(
                        self.tk_atan2(x_mid_value - e2, y_mid_value - e1) %
                        (self.tk_pi * 2)))

                if e1 == y_mid_value and e2 == x_mid_value:
                    # Ignore mid value
                    d = [0, 0, 0, 0]

                elif _dir in axis_dir:
                    # Axis aligned direction
                    d = axis_dir[_dir][:]

                else:
                    # Find the closest non-axis angle
                    non_axis = min(non_axis_dir.keys(),
                                   key=lambda x: abs(x - _dir))
                    d = non_axis_dir[non_axis][:]

                d.append(min(y_range_value, x_range_value))

                # Fixes peeking by lowering the extra angle per wall
                # as the distance increases
                dist = self.tk_hypot(x_mid_value - e2, y_mid_value - e1)
                d.append(max(0, 0.16 - 0.01 * dist))

                row.append(tuple(d))

            self.s_shadow_dir_map.append(tuple(row))

        self.s_shadow_dir_map = tuple(self.s_shadow_dir_map)

    def s_loadSurfaceMap(self, surface):
        """
            Load the ground layer as shadow mask

            surfaces -> All macro surfaces

            return -> None

        """
        fade_surface = self.tk_surface(surface.get_size(), self.tk_srcalpha)
        fade_surface.fill(self.tk_shadow_color)

        self.s_fade_surf = surface.copy()
        self.s_fade_surf.blit(fade_surface, (0, 0))

    def s_loadCellWalls(self, cellwalls):
        """
            Load world map and convert all cells to 2d binary map (Walls=1 else 0) 

            cellwalls -> 2d world map

            return -> None
            
        """
        final = []
        # Per-cell size
        self.shadow_map_size = len(cellwalls[0]), len(cellwalls)

        # Per-sector size
        self.shadow_world_size = self.shadow_map_size[
            0] * 32, self.shadow_map_size[1] * 32

        # Cells with collisions are marked with 1 else 0
        for y in xrange(self.shadow_map_size[1]):
            row = []
            for x in xrange(self.shadow_map_size[0]):
                row.append(1 if cellwalls[y][x].w_collision else 0)
            final.append(tuple(row))

        self.shadow_map[:] = final

    # Note: Remove this
    exec(
        PreProcessor.parseCode(
            """
def s_applyShadows(self, surface):

    x, y = self.w_share["WorldPosition"]
    ofsx = x - self.w_share['ShadowOffset'][0]
    ofsy = y - self.w_share['ShadowOffset'][1]

    #-ifdef/tk_shadow_quality
    self.s_shadow_surf.blit(surface, (0, 0))
    self.s_shadow_surf.lock()
    #-endif

    rounded_x, rounded_y = int(x), int(y)
    
    ori_x, ori_y = (self.tk_res_half[0] + rounded_x,
                    self.tk_res_half[1] + rounded_y)
    
    # Get index of which cell the player is standing on
    near_x, near_y = -int(x - 16) >> 5, -int(y - 16) >> 5
    
    min_y, max_y = self.tk_shadow_minmax_y
    min_x, max_x = self.tk_shadow_minmax_x

    # Shadows are calculated clockwise
    for e1, ry in enumerate(xrange(near_y - min_y, near_y + max_y)):
        if ry < 1 or ry > self.shadow_map_size[1]-2:
            # Most outer rows do not cast shadows (Walls between playable area and the void) 
            continue
        for e2, rx in enumerate(xrange(near_x - min_x, near_x + max_x)):
            if rx < 1 or rx > self.shadow_map_size[0]-2:
                # Most outer columns do not cast shadows
                continue
            # Check if the cell can cast shadows
            if self.shadow_map[ry][rx]:
                # Get the line endpoints
                pc = self.s_shadow_dir_map[e1][e2]
                
                # Position of the object(TopLeft)
                sox = (ori_x + 32 * rx - 17) - ofsx
                soy = (ori_y + 32 * ry - 17) - ofsy  

                # How far the shadow is casted from the wall/object
                length = 80 * pc[4]
                
                ep1 = sox + pc[0], soy + pc[1]      # Endpoint 1
                # Calculate the angle to endpoints of the cubes
                angle_1 = self.tk_atan2(ori_x - (ep1[0] + x), ori_y - (ep1[1] + y)) + pc[5]
                end_p_1 = (ep1[0] - self.tk_sin(angle_1) * length,
                           ep1[1] - self.tk_cos(angle_1) * length)
                
                ep2 = sox + pc[2], soy + pc[3]      # Endpoint 2
                angle_2 = self.tk_atan2(ori_x - (ep2[0] + x), ori_y - (ep2[1] + y)) - pc[5]
                end_p_2 = (ep2[0] - self.tk_sin(angle_2) * length,
                           ep2[1] - self.tk_cos(angle_2) * length)
                
                # Cast a shadow polygon from the line and color it for colorkeying
                self.tk_draw_polygon({surface}, self.tk_shadow_mask_color,
                                    (ep1, end_p_1, end_p_2, ep2))

    #-ifdef/tk_shadow_quality 
    self.s_shadow_surf.unlock()

    mapPos = -x, -y 

    # The shadow map is the chosen layer 'shadowed' and stored in memory.
    # This section cuts a correct size of that map and displays it
    
    # If the topleft map corner is in view, clamp it from going below 0
    topLeft = max(0, mapPos[0] - self.tk_res_half[0] + 16), max(0, mapPos[1] - self.tk_res_half[1] + 16)

    bottomRight = (min(self.shadow_world_size[0] - topLeft[0], self.tk_res_half[0] + mapPos[0] + 16), 
                   min(self.shadow_world_size[1] - topLeft[1], self.tk_res_half[1] + mapPos[1] + 16)) 

    # Area which will be cut from the shadow map
    area = (topLeft[0] + (ofsx if topLeft[0] else 0), 
            topLeft[1] + (ofsy if topLeft[1] else 0), 
            bottomRight[0] - (ofsx if topLeft[0] else -1 - ofsx), 
            bottomRight[1] - (ofsy if topLeft[1] else -1 - ofsy))

    # The topleft anchor point should always be at topleft corner of the screen
    # But when the topleft of the map is in view, it should anchor to that one
    dest = (self.tk_res_half[0] + x - 16 + topLeft[0] - (ofsx if not topLeft[0] else 1), 
            self.tk_res_half[1] + y - 16 + topLeft[1] - (ofsy if not topLeft[1] else 1))  
    
    # Blit the shadowed part of the ground

    surface.blit(self.s_fade_surf, dest, area=area)  
    
    # Blit the visible area
    surface.blit(self.s_shadow_surf, (0, 0))
    #-endif

    """.format(surface='self.s_shadow_surf' if GlobalGameData.
               tk_shadow_quality else 'surface'),
            tk_shadow_quality=GlobalGameData.tk_shadow_quality))

コード例 #24

0

ファイルを表示

ファイル: Imdb.py プロジェクト: ding-ma/applied-ml

            df = df.append(
                {
                    "train_or_test": path[1],
                    "review_type": path[2],
                    "sentence": f.read(),
                    "review_number": detail[1],
                    "review_id": detail[0],
                },
                ignore_index=True,
            )
    df.to_csv(output_name, index=False)


imdb_raw_df = pd.read_csv(dataset_path.joinpath("imdb_raw.csv"))
"""
list of words that are common to both dataset
    > we can play with which word to remove and see the performance of the model
    br is a html tag
"""
common_words = [
    "br",
    # 'film',
    # 'movie',
    # 'one',
    # 'like',
    # 'good',
    # 'time'
]
imdb_processor = PreProcessor(imdb_raw_df, common_words, "imdb")
imdb_processor.process()

コード例 #25

0

ファイルを表示

        "target": twenty_news_group_train.target,
        "train_or_test": "train",
        "sentence": twenty_news_group_train.data
    })
twenty_news_test_df = pd.DataFrame(
    data={
        "target": twenty_news_group_test.target,
        "train_or_test": "test",
        "sentence": twenty_news_group_test.data
    })
twenty_news_combined_df = twenty_news_train_df.append(twenty_news_test_df)

twenty_news_combined_df["sentence"] = twenty_news_combined_df[
    "sentence"].apply(lambda x: x.replace("\n", " ").replace("\r", "").replace(
        "\t", " ").strip())
twenty_news_combined_df.reset_index(inplace=True)
twenty_news_combined_df.rename(columns={"index": "id"}, inplace=True)
twenty_news_combined_df.to_csv(dataset_path.joinpath("twenty_news_raw.csv"),
                               sep="\t",
                               index=False)

twenty_news_raw_df = pd.read_csv(dataset_path.joinpath("twenty_news_raw.csv"),
                                 sep="\t")
common_words = [
    # TODO: to be determined
]

twenty_news_processor = PreProcessor(twenty_news_raw_df, common_words,
                                     "twenty_news")
twenty_news_processor.process()

コード例 #26

0

ファイルを表示

from PreProcessor import PreProcessor

__author__ = 'Shaun Rong'
__version__ = '0.1'
__maintainer__ = 'Shaun Rong'
__email__ = '*****@*****.**'

with open('../environ.yaml', 'r') as f:
    env = yaml.load(f)

stanford_parser_folder = env['stanford_parser_folder']

os.environ['STANFORD_PARSER'] = stanford_parser_folder
os.environ['STANFORD_MODELS'] = stanford_parser_folder

cfuf = PreProcessor()
with open('../data/train/2.raw.txt', 'r') as f:
    text = f.read().splitlines()

orig_text = text[5]
process_text, sub_table = cfuf.process([orig_text.strip()])

sen = process_text[0]

parser = stanford.StanfordDependencyParser(model_path=env['model_path'])
sentences = parser.raw_parse(sen)

for parse in sentences:
    for t in parse.triples():
        print t
"""

コード例 #27

0

ファイルを表示

ファイル: simulator_wpc.py プロジェクト: aaalok5106/BTP_2k18-19

def executeSinglePlSqlFile(data, spec):
    f = open(data, 'r')
    linesOfCode = len(f.readlines())
    f.close()

    processor = PreProcessor(spec, data)
    tableInfo, assumeConstraintList, assertConstraintList, resultString = processor.start(
    )

    file = open('wpc/upper_input.sql', "w")
    file.write(resultString)
    file.close()

    # recording startTime
    startTime1 = datetime.datetime.now()

    input = FileStream('wpc/upper_input.sql')
    lexer = PlSqlLexer(input)
    stream = CommonTokenStream(lexer)
    parser = PlSqlParser(stream)
    tree = parser.sql_script()

    cfg = MyCFG()
    helper = MyHelper(parser)
    helper.updateTableDict(tableInfo)
    utility = MyUtility(helper)
    v = MyVisitor(parser, cfg, utility)
    v.visit(tree)

    print("\nRaw CFG :", v.rawCFG, "\n")

    # for key in v.cfg.nodes:
    #     if v.cfg.nodes[key].ctx != None:
    #         print(key, " --> ", v.cfg.nodes[key].ctx.getText())
    # print("\n")

    res = MyRawCfgToGraph(v.rawCFG, cfg)
    res.execute()
    # cfg.printPretty()
    # print("\n")

    utility.generateVariableSet(cfg)

    # all properties of each node
    # for nodeId in cfg.nodes:
    #     cfg.nodes[nodeId].printPretty()

    ssaString = MySsaStringGenerator(cfg, parser)
    ssaString.execute(
    )  # only for generating DOT file for "before_versioning_graph"

    # recording finishTime
    finishTime1 = datetime.datetime.now()

    # cfg.dotToPng(cfg.dotGraph, "wpc/raw_graph")
    #
    # hello1 = utility.generateBeforeVersioningDotFile(cfg)
    # cfg.dotToPng(hello1, "wpc/before_versioning_graph")

    # recording startTime
    startTime2 = datetime.datetime.now()

    algo = WpcGenerator(cfg, helper, ssaString)
    algo.execute()
    algo.finalWpcString = algo.finalWpcString.replace("  ", " ")
    # done: replace " = " with " == " in algo.finalWpcString
    algo.finalWpcString = algo.finalWpcString.replace(" = ", " == ")

    print("\n**** Final WPC VC in Well_Bracketted_Format:\n\n",
          algo.finalWpcString, "\n")

    # print(algo.variablesForZ3)

    # algo.finalWpcString = "( ( z ) ^ ( ( ! ( y ) ) ==> ( ( ( 2 ) v ( x ) ) ==> ( y - 2 ) ) ) )"       # for testing! Don't UNCOMMENT...
    # algo.finalWpcString = "( ( ( z ) ==> ( u ) ) ^ ( ( ! ( y ) ) ==> ( ( ( true ) ) ==> ( y - 2 ) ) ) )"       # for testing! Don't UNCOMMENT...
    # algo.finalWpcString = "( ( ( z ) ==> ( u ) ) ^ ( ( ! ( y ) ) ==> ( true ) ) ^ ( ( a ) ==> ( b ) ) )"       # for testing! Don't UNCOMMENT...
    # algo.finalWpcString = "( ( ( ! ( y ) ) ==> ( true ) ) )"       # for testing! Don't UNCOMMENT...
    # algo.finalWpcString = "( ( ( ! ( y ) ) ^ ( true ) v ( g ) ) )"       # for testing! Don't UNCOMMENT...
    z3StringConvertor = WpcStringConverter(algo.finalWpcString)
    z3StringConvertor.execute()
    # z3StringConvertor.convertedWpc is the FINAL VC Generated...
    print("\n**** Final WPC VC in Z3 Format:\n\n",
          z3StringConvertor.convertedWpc, "\n")

    z3FileString = "# This file was generated at runtime on " + str(
        datetime.datetime.now()) + "\n"
    z3FileString = z3FileString + "from z3 import *\n\n"
    z3FileString = z3FileString + "class Z3RuntimeWpcFile():\n"
    z3FileString = z3FileString + "\t" + "def __init__(self):\n"
    z3FileString = z3FileString + "\t\t" + "self.finalFormula = \"\"\n"
    z3FileString = z3FileString + "\t\t" + "self.satisfiability = \"\"\n"
    z3FileString = z3FileString + "\t\t" + "self.modelForViolation = \"\"\n\n"

    z3FileString = z3FileString + "\t" + "def execute(self):\n"
    for i in algo.variablesForZ3:
        z3FileString = z3FileString + "\t\t" + i + " = Real(\'" + i + "\')\n"
    z3FileString = z3FileString + "\n\t\ts = Solver()\n"

    if len(z3StringConvertor.implies_p) > 0:
        for i in range(len(z3StringConvertor.implies_p)):
            z3FileString = z3FileString + "\t\t" + "s.add(" + z3StringConvertor.implies_p[
                i] + ")\n"
            if not z3StringConvertor.convertedWpc == z3StringConvertor.implies_p_q[
                    i]:
                z3FileString = z3FileString + "\t\t" + "s.add(" + z3StringConvertor.implies_p_q[
                    i] + ")\n"
    z3FileString = z3FileString + "\t\t" + "s.add( Not( " + z3StringConvertor.convertedWpc + " ) )\n"

    # z3FileString = z3FileString + "\n\t\t" + "print()"
    z3FileString = z3FileString + "\n\t\t" + "#print(\"\\n%%%%%%%%%% Aggregate Formula %%%%%%%%%%\\n\", s)"
    z3FileString = z3FileString + "\n\t\t" + "self.finalFormula = str(s)"
    # z3FileString = z3FileString + "\n\t\t" + "print()"
    z3FileString = z3FileString + "\n\t\t" + "#print(\"\\n%%%%%%%%%% Satisfiability %%%%%%%%%%\")\n"
    z3FileString = z3FileString + "\n\t\t" + "self.satisfiability = str(s.check())"

    z3FileString = z3FileString + "\n\t\t" + "if self.satisfiability == \"sat\":"
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    z3FileString = z3FileString + "\n\t\t\t" + "#print(\"\\n-------->> Violation Occurred...\")"
    z3FileString = z3FileString + "\n\t\t\t" + "self.satisfiability = \"violation\""
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    z3FileString = z3FileString + "\n\t\t\t" + "#print(\"\\n%%%%%%%%%% An Instance for which Violation Occurred %%%%%%%%%%\\n\", s.model())"
    z3FileString = z3FileString + "\n\t\t\t" + "self.modelForViolation = str(s.model())"

    z3FileString = z3FileString + "\n\t\t" + "elif self.satisfiability == \"unsat\":"
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    z3FileString = z3FileString + "\n\t\t\t" + "#print(\"\\n-------->> NO Violation Detected so far...\\n\")"
    z3FileString = z3FileString + "\n\t\t\t" + "self.satisfiability = \"sat\""
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t" + "print()\n"

    file = open('wpc/Z3RuntimeWpcFile.py', "w")
    file.write(z3FileString)
    file.close()

    # time.sleep(2)

    # import file created on Runtime...
    import wpc.Z3RuntimeWpcFile
    from wpc.Z3RuntimeWpcFile import Z3RuntimeWpcFile
    # Reload after module's creation to avoid old module remain imported from disk...VVI...
    wpc.Z3RuntimeWpcFile = reload(wpc.Z3RuntimeWpcFile)

    z3Runtime = Z3RuntimeWpcFile()
    z3Runtime.execute()
    # print(z3Runtime.finalFormula)
    # print(z3Runtime.satisfiability)
    # print(z3Runtime.modelForViolation)

    # recording finishTime
    finishTime2 = datetime.datetime.now()
    timeDifference = ((finishTime1 - startTime1) +
                      (finishTime2 - startTime2)).total_seconds()

    return linesOfCode, timeDifference, z3StringConvertor.convertedWpc, z3Runtime.satisfiability, z3Runtime.modelForViolation

コード例 #28

0

ファイルを表示

ファイル: Generator.py プロジェクト: liar666/RedDigits

    def generate(self):
        if self._dataSet == None:
            
            # Create outdirs
            if not os.path.exists(Generator.DIGITS_OUTDIR):
                os.makedirs(Generator.DIGITS_OUTDIR)
            if not os.path.exists(Generator.TRAINSET_OUTDIR):
                os.makedirs(Generator.TRAINSET_OUTDIR)
                
            # Treat all digit images found in source dir
            files = Generator.listFiles(Generator.DIGITS_SRCDIR)
            outDF = pd.DataFrame(data=[], columns=self._dataColumns+self._classColumns)
            for imgFile in files:
                outDF1Nb = pd.DataFrame(data=[], columns=self._dataColumns+self._classColumns)
                number = Generator.extractNumber(imgFile)
                img =  Utils.readImage(Generator.DIGITS_SRCDIR + "/" + imgFile);
                # double image canvas and center digit
                transform = AffineTransform(scale=(2,2), translation=(-img.shape[1]/2,-img.shape[0]/2))
                img = warp(img, transform, output_shape=img.shape, mode='edge')
                #imgSize = img.shape
               
                # Apply the modifications/augmentations on the digit image
                for tilt in Generator.tiltRange:
                    tiltTform = AffineTransform(shear=tilt, translation=(215*tilt/4, 0))
                    for angle in Generator.angleRange:
                        for xtransl in Generator.xTranslRange:
                            for ytransl in Generator.yTranslRange:
                                transTform= AffineTransform(translation=(xtransl,ytransl))
                                for sigma in Generator.blurRange:
                                    for ratio in Generator.zoomRange:
                                        #print(np.mean(img))
                                        zoomTform = AffineTransform(scale=(ratio,ratio), translation=(img.shape[1]/ratio/3-img.shape[1]/3,img.shape[0]/ratio/3-img.shape[0]/3))
                                        tilted   = warp(img, tiltTform, output_shape=img.shape, mode='edge')
                                        #print(np.mean(tilted))
                                        rotated  = rotate(tilted, angle, mode='edge');
                                        #print(np.mean(rotated))
                                        transtd  = warp(rotated, transTform, output_shape=img.shape, mode='edge')
                                        #print(np.mean(transtd))
                                        blured   = transtd.copy()
                                        blured[:, :, 0] = gaussian(blured[:, :, 0], sigma, preserve_range = True)
                                        #print(np.mean(blured))
                                        resized  =- warp(blured, zoomTform, output_shape=img.shape, mode='edge')
                                        print("resized="+str(np.mean(resized)))
                                        resized = -1*resized ## TODO: why does resize invert all the values?
                                        
                                        # Save the new image
                                        newFile = Generator.DIGITS_OUTDIR + "/" + str(number) + \
                                         "_t" + str(tilt)  + "_r" + str(angle) + "_t" + str(xtransl) + "_" + str(ytransl) + \
                                         "_b" + str(sigma) + "_s" + str(ratio) + ".png"
                                        print("Writing image file: " + newFile)
                                        #Utils.showImage(resized);
                                        Utils.writeImage(resized, newFile)

                                        print("Treating file: " + newFile)
                                        attributes = pd.DataFrame(PreProcessor.preprocessImage(resized).reshape(1,PreProcessor.TRAIN_WIDTH*PreProcessor.TRAIN_HEIGHT))
                                        attributes["class"] = number
#                                        print(attributes)
#                                        print(type(attributes))
#                                        print(attributes.shape)
                                        attributes.columns=outDF1Nb.columns
                                        outDF1Nb = outDF1Nb.append(attributes) # , ignore_index=True
                # Save the partial dataframe for the current digits (in case of crash, allows to restarts only on untreated digits)
                outDF1Nb.to_csv(Generator.TRAINSET_OUTDIR + "/trainset_" + number + ".csv");
                outDF = outDF.append(outDF1Nb, ignore_index=True)
            # Save the whole dataframe for all digits
            outDF.to_csv(Generator.TRAINSET_OUTDIR + "/trainset_full.csv")
            self._dataSet = outDF

コード例 #29

0

ファイルを表示

ファイル: simulator_cnf.py プロジェクト: aaalok5106/BTP_2k18-19

def executeSinglePlSqlFile(data, spec):
    f = open(data, 'r')
    linesOfCode = len(f.readlines())
    f.close()

    processor = PreProcessor(spec, data)
    tableInfo, assumeConstraint, assertConstraint, resultString = processor.start(
    )

    file = open('cnf/upper_input.sql', "w")
    file.write(resultString)
    file.close()

    # recording startTime
    startTime = datetime.datetime.now()

    input = FileStream('cnf/upper_input.sql')
    lexer = PlSqlLexer(input)
    stream = CommonTokenStream(lexer)
    parser = PlSqlParser(stream)
    tree = parser.sql_script()
    # ast = tree.toStringTree(recog=parser)
    # print(str(MyPlSqlVisitor(parser).getRuleName(tree)))
    # print("\n\n", signature(tree.toStringTree), "\n")

    cfg = MyCFG()
    helper = MyHelper(parser)
    helper.updateTableDict(tableInfo)
    utility = MyUtility(helper)
    v = MyVisitor(parser, cfg, utility)
    v.visit(tree)

    print("\nRaw CFG : ", v.rawCFG)

    # for key in v.cfg.nodes:
    #     if v.cfg.nodes[key].ctx != None:
    #         print(key, " --> ", v.cfg.nodes[key].ctx.getText())

    res = MyRawCfgToGraph(v.rawCFG, cfg)
    res.execute()
    # cfg.printPretty()
    # cfg.dotToPng(cfg.dotGraph, "cnf/raw_graph")  # TODO: make dot file in cnf form
    utility.generateDomSet(cfg)
    # print("Dominator set ended----------->\n\n")
    utility.generateSDomSet(cfg)
    # print("Strictly Dominator set ended ----------->\n\n")
    utility.generatIDom(cfg)
    # print("Immediate Dominator ended ----------->\n\n")
    utility.generateDFSet(cfg)
    utility.insertPhiNode(cfg)

    utility.initialiseVersinosedPhiNode(cfg)
    utility.versioniseVariable(cfg)
    utility.phiDestruction(cfg)

    ssaString = MySsaStringGenerator(cfg, parser)
    ssaString.execute()

    # utility.generateFinalDotGraph(cfg)
    # for nodeId in cfg.nodes:
    #     cfg.nodes[nodeId].printPretty()

    # cfg.dotToPng(cfg.dotGraph, "cnf/raw_graph")
    #
    # hello1 = utility.generateBeforeVersioningDotFile(cfg)
    # cfg.dotToPng(hello1, "cnf/before_versioning_graph")
    #
    # hello4 = utility.generateDestructedPhiNodeWalaDotFile(cfg)
    # cfg.dotToPng(hello4, "cnf/destructed_phi_node_wala_graph")

    cnfUtility = CnfUtility(helper)
    iCnfCfg = cnfUtility.copyCfg(cfg)
    reverseCnfCfg = cnfUtility.topologicalSort(iCnfCfg)
    cnfUtility.unvisit(iCnfCfg)
    cnfUtility.setParentBranching(iCnfCfg)

    cnfCfg = cnfUtility.reverseDictOrder(reverseCnfCfg)
    cnfUtility.copyParentBranching(cnfCfg, iCnfCfg)
    # print("\n\n\n\n\n\t\t\tThe intermediate CNF form is ------------------------------>\n\n\n\n")

    # for nodeId in iCnfCfg.nodes:
    #     iCnfCfg.nodes[nodeId].printPretty()

    # print("\n\n\n\n\n\t\t\tThe CNF form is ------------------------------>\n\n\n\n")

    cnfVcGenerator = CnfVcGenerator(cnfCfg, parser)

    cnfPath = []

    for nodeId in cnfCfg.nodes:
        cnfPath.append(nodeId)

    cnfVcGenerator.generateCnfVc(cnfPath)

    # print("\n\n\n\n\t\t\tThe CNF VCs are : ------------------------------->\n\n\n")
    # print(cnfVcs)

    # for nodeId in cnfCfg.nodes:
    #     cnfCfg.nodes[nodeId].printPretty()

    # cnfVc = cnfUtility.cnfVc(cnfCfg)
    #
    # print("\n\n\t\tThe CNF VCs are ----------------->\n\n\n")
    #
    # for str in cnfVc:
    #     print(str)

    varSet, z3Str = cnfUtility.iZ3format(cnfCfg)

    # print("\n\n*******************\n\n", z3Str, "\n\n--------------\n\n")
    # print(varSet)
    #
    # print("\n\n")
    z3Str = z3Str.replace("  ", " ")
    z3Str = z3Str.replace(" == ", " = ")
    z3Str = z3Str.replace(" = ", " == ")

    print("\n**** Final CNF VC in Well_Bracketted_Format:\n\n", z3Str, "\n")

    z3StringConvertor = WpcStringConverter(z3Str)
    z3StringConvertor.execute()

    # print("\n**** Final CNF VC in Z3 Format:\n", z3StringConvertor.convertedWpc, "\n")

    z3FileString = "# This file was generated at runtime on " + str(
        datetime.datetime.now()) + "\n"
    z3FileString = z3FileString + "from z3 import *\n\n"
    z3FileString = z3FileString + "class Z3RuntimeCnfFile():\n"
    z3FileString = z3FileString + "\t" + "def __init__(self):\n"
    z3FileString = z3FileString + "\t\t" + "self.finalFormula = \"\"\n"
    z3FileString = z3FileString + "\t\t" + "self.satisfiability = \"\"\n"
    z3FileString = z3FileString + "\t\t" + "self.modelForViolation = \"\"\n\n"

    z3FileString = z3FileString + "\t" + "def execute(self):\n"
    for i in varSet:
        z3FileString = z3FileString + "\t\t" + i + " = Real(\'" + i + "\')\n"
    z3FileString = z3FileString + "\n\t\ts = Solver()\n"

    if len(z3StringConvertor.implies_p) > 0:
        for i in range(len(z3StringConvertor.implies_p)):
            z3FileString = z3FileString + "\t\t" + "s.add(" + z3StringConvertor.implies_p[
                i] + ")\n"
            if not z3StringConvertor.convertedWpc == z3StringConvertor.implies_p_q[
                    i]:
                z3FileString = z3FileString + "\t\t" + "s.add(" + z3StringConvertor.implies_p_q[
                    i] + ")\n"
    #     if z3StringConvertor.convertedWpc not in z3StringConvertor.implies_p_q:
    #         z3FileString = z3FileString + "\t\t" + "s.add(" + z3StringConvertor.convertedWpc + ")\n"
    # else:
    #     z3FileString = z3FileString + "\t\t" + "s.add(" + z3StringConvertor.convertedWpc + ")\n"
    z3FileString = z3FileString + "\t\t" + "s.add( Not( " + z3StringConvertor.convertedWpc + " ) )\n"

    # z3FileString = z3FileString + "\n\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t" + "print(\"%%%%%%%%%% Aggregate Formula %%%%%%%%%%\\n\", s)"
    z3FileString = z3FileString + "\n\t\t" + "self.finalFormula = str(s)"
    # z3FileString = z3FileString + "\n\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t" + "print(\"%%%%%%%%%% Satisfiability %%%%%%%%%%\")\n"
    z3FileString = z3FileString + "\n\t\t" + "self.satisfiability = str(s.check())"

    z3FileString = z3FileString + "\n\t\t" + "if self.satisfiability == \"sat\":"
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t\t" + "print(\"-------->> Violation Occurred...\")"
    z3FileString = z3FileString + "\n\t\t\t" + "self.satisfiability = \"violation\""
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t\t" + "print(\"%%%%%%%%%% An Instance for which Violation Occurred %%%%%%%%%%\\n\", s.model())"
    z3FileString = z3FileString + "\n\t\t\t" + "self.modelForViolation = str(s.model())"

    z3FileString = z3FileString + "\n\t\t" + "elif self.satisfiability == \"unsat\":"
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t\t" + "print(\"-------->> NO Violation Detected so far...\")"
    z3FileString = z3FileString + "\n\t\t\t" + "self.satisfiability = \"sat\""
    # z3FileString = z3FileString + "\n\t\t\t" + "print()"
    # z3FileString = z3FileString + "\n\t\t" + "print()\n"

    file = open('cnf/Z3RuntimeCnfFile.py', "w")
    file.write(z3FileString)
    file.close()

    import cnf.Z3RuntimeCnfFile
    from cnf.Z3RuntimeCnfFile import Z3RuntimeCnfFile
    # Reload after module's creation to avoid old module remain imported from disk...VVI...
    cnf.Z3RuntimeCnfFile = reload(cnf.Z3RuntimeCnfFile)

    z3Runtime = Z3RuntimeCnfFile()
    z3Runtime.execute()

    finishTime = datetime.datetime.now()
    timeDifference = (finishTime - startTime).total_seconds()

    return linesOfCode, timeDifference, z3StringConvertor.convertedWpc, z3Runtime.satisfiability, z3Runtime.modelForViolation

コード例 #30

0

ファイルを表示

ファイル: TwitterSATest.py プロジェクト: TylerCollison/TwitterStockMarketPredictor

from TwitterSentimentAnalyzer import TwitterSentimentAnalyzer
from PreProcessor import PreProcessor

er = TwitterSentimentAnalyzer()
processor = PreProcessor()

tmp = "RT @Cj_Walker1: My family aand I have came to a great decision! With that being said I would like to say I have committed to The Ohio State"
clean = processor.preProcess(tmp, lowercase=True)
cleanString = ""
for s in clean:
    cleanString = cleanString + s + " "
cleanStringList = [cleanString]
for i in range(39):
    cleanStringList.append(" ")
print(cleanString)
print(processor.cleanText((tmp)))
result = er.Evaluate(cleanStringList)
print(result)

コード例 #31

0

ファイルを表示

cv2.namedWindow('frame',cv2.WINDOW_NORMAL)
cv2.namedWindow('threshold',cv2.WINDOW_NORMAL)
cv2.resizeWindow('frame', 600,600)
cv2.resizeWindow('threshold', 600,600)
while(True):
    # Capture frame-by-frame
    if set==0:
        ret, frame = cap.read()
    else:
        url = "http://192.168.1.95:8080/shot.jpg"
        imgResp = requests.get(url)
        imgArr = np.array((bytearray(imgResp.content)), dtype=np.uint8)
        frame = cv2.imdecode(imgArr, -1)
    #copy the output
    output = frame
    thresh = PreProcessor.preProcessImage(frame)
    #draw only rectangle-alike countours wchich permieter is bigger than 'some value'
    cardImg = CardProcessor.findCards(thresh, toDraw= output)
    if len(cardImg) > 0:
        print(cardImg)
        cv2.imshow("Show Boxes", cardImg)


    # Display the resulting frame
    cv2.imshow('frame',output)
    cv2.imshow('threshold',thresh)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
if set==0:

コード例 #32

0

ファイルを表示

#Utils.showImage(transtd)

blurred = transtd.copy()
blurred[:, :, 0] = gaussian(blurred[:, :, 0], sigma, preserve_range=True)
print("blurred=" + str(np.mean(blurred[:, :, 0:3])))
#Utils.showImage(blurred)

resized = resize(blurred, (ratio * blurred.shape[0], ratio * blurred.shape[1]),
                 anti_aliasing=True,
                 preserve_range=True)
print("resized=" + str(np.mean(resized[:, :, 0:3])))
#Utils.showImage(resized)

print("Treating file: " + newFile)
attributes = pd.DataFrame(
    PreProcessor.preprocessImage(resized).reshape(1,
                                                  TRAIN_WIDTH * TRAIN_HEIGHT))
attributes["class"] = number
print(type(attributes))
print(attributes.shape)
print(attributes)

###################

from DigitPositionDetector import DigitPositionDetector

dpd = DigitPositionDetector(resized)
dpd.detect()
detectedDigits = dpd.getDetectedDigits()
for dd in detectedDigits:
    dd.display()

コード例 #33

0

ファイルを表示

    '201804120000', '201804110000', '201804100000', '201804090000',
    '201804060000', '201804050000', '201804040000', '201804030000',
    '201804020000', '201803290000', '201803280000', '201803270000',
    '201803260000'
]
day30End = [
    '201804250000', '201804240000', '201804210000', '201804200000',
    '201804190000', '201804180000', '201804170000', '201804140000',
    '201804130000', '201804120000', '201804110000', '201804100000',
    '201804070000', '201804060000', '201804050000', '201804040000',
    '201804030000', '201803300000', '201803290000', '201803280000',
    '201803270000'
]
#Get Sentiment Score
er = TwitterSentimentAnalyzer()
processor = PreProcessor()
result = []
for i in range(0, 21):
    processedTweets = []
    rule = gen_rule_payload("MSFT OR Microsoft",
                            from_date=day30Start[i],
                            to_date=day30End[i],
                            results_per_call=80)
    tweets = collect_results(rule,
                             max_results=80,
                             result_stream_args=premium_search_args)
    for tweet in tweets[0:80]:
        r = ' '.join(word for word in processor.preProcess(tweet.all_text))
        processedTweets.append(r)
    total = 0
    for i in range(0, 2):