Esempio n. 1
0
    def NegaMax(self, board, move, turn, depth, alpha, beta, tilesearchrange):
        #print("CURRENT POSITION",move,isMaximizingPlayer)
        if WinChecker(board).CheckBoth() or depth == 0:
            #print("REACHED TERMINAL")
            return (Analyzer(board).Grader(self.AIStoneType) -
                    Analyzer(board).Grader(self.EnemyStoneType), move)

        v = -10000000
        for moves in self.aiutils.GetOpenMovesPlus(board,
                                                   self.OpenSearchRange):
            score = -self.NegaMax(
                self.aiutils.GenerateCustomGameBoard(
                    board, moves, self.AIStoneType
                    if turn == self.EnemyStoneType else self.EnemyStoneType),
                moves, self.AIStoneType
                if turn == self.EnemyStoneType else self.EnemyStoneType,
                depth - 1, -beta, -alpha, tilesearchrange)[0]
            if score > v:
                v = score
            alpha = max(alpha, score)
            if alpha >= beta:
                print("AB CUTOFF")
                break

        return (v, move)
Esempio n. 2
0
    def setUpClass(cls):
        cls.tickers = ['AAPL', 'MSFT']
        cls.analyzer = Analyzer(cls.tickers, start='2010-01-01')
        cls.aapl_long_3 = TradingRecord('AAPL', 3, '2010-01-05', cls.analyzer.stock_price('AAPL', '2010-01-05'))
        cls.aapl_short_3 = TradingRecord('AAPL', -3, '2010-01-15', cls.analyzer.stock_price('AAPL', '2010-01-15'))
        cls.msft_long_3 = TradingRecord('MSFT', 3, '2010-01-05', cls.analyzer.stock_price('MSFT', '2010-01-05'))
        cls.msft_short_3 = TradingRecord('AAPL', -3, '2010-01-15', cls.analyzer.stock_price('MSFT', '2010-01-15'))
        cls.records = {'AAPL':[cls.aapl_long_3, cls.aapl_short_3],
                       'MSFT':[cls.msft_long_3, cls.msft_short_3]}

        cls.analyzer_momentum = Analyzer(cls.tickers, start='2010-01-01')
Esempio n. 3
0
def getPupil(image, params):
    analysis = Analyzer(image, params)
    eyeData = analysis.getEyeData()

    reflections = eyeData.getReflection()
    likelyCandidate = eyeData.getPupilCentreCandidate(db.Eyeball.Eyeball.FilterOptions.REFLECTION)
    return likelyCandidate
Esempio n. 4
0
 def ch1Graph(self, resultDict, graph, startDate, endDate):
     ''' Sets title and labels for choice one, and graphs values. Returns analysis string.'''
     
     # Graph setup
     graph.set_title("Stock Price vs Time")      # Set graph title
     graph.set_xlabel("Date")                    # Set x-axis label
     graph.set_ylabel("Stock Price")             # Set y-axis label
     
     # for each stock dictionary in the resultDict (list of dictionaries)
     for stock in resultDict:            
         # Create list of dates from dictionary
         dateList = [date for date in sorted(stock['Time Series (Daily)'], key = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))]           
         # Create list of prices for corresponding dates
         prices = [stock['Time Series (Daily)'][date] for date in sorted(stock['Time Series (Daily)'], key = lambda x: datetime.datetime.strptime(x, '%Y-%m-%d'))]       
         # Plot the prices
         graph.plot(prices, label= stock['Meta Data']['2. Symbol'])          
     
     # Set legend in the best location
     graph.legend(loc = 'best')      
     # Set 3 x-axis points
     graph.set_xticks([0,len(dateList) // 2,len(dateList) - 1])
     # Set corresponding dates on x-axis
     graph.set_xticklabels([dateList[0], dateList[(len(dateList) - 1) // 2], dateList[-1]])  
     
     # Analysis
     a = Analyzer()     
     # Get change and stock name from analysis of dictionary values
     analysis = a.compareOne(resultDict, startDate, endDate)      
     
     # Return analysis string to GUI 
     return analysis  
Esempio n. 5
0
def process():
    # user_id, age, gender, heartrate, Systolic_BP, Diastolic_BP, blood_oxygen, temperature, time):
    #def __init__(self, Systolic_BP, Diastolic_BP, Heart_Rate, Heart_Oxy_Level, Body_temp):
    data = Input_module.input()
    time.sleep(0.5)
    for idx, content in enumerate(data):
        process = Analyzer(data[idx]["Systolic_BP"], data[idx]["Diastolic_BP"],
                           data[idx]["heartrate"], data[idx]["blood_oxygen"],
                           data[idx]["temperature"])
        signal_loss = process.Signal_Loss(data[idx]["heartrate"],
                                          data[idx]["temperature"])
        shock_alert = process.Shock_Alert(data[idx]["heartrate"],
                                          data[idx]["temperature"])
        oxygen_supply = process.Oxygen_Supply(data[idx]["blood_oxygen"])
        fever = process.Fever(data[idx]["temperature"])
        hypotension = process.Hypotension(data[idx]["Systolic_BP"],
                                          data[idx]["Diastolic_BP"])
        hypertension = process.Hypertension(data[idx]["Systolic_BP"],
                                            data[idx]["Diastolic_BP"])

        result = Output_Module.display_basic_iuput_data(
            signal_loss, shock_alert, oxygen_supply, fever, hypotension,
            hypertension)
        print('--------------------------------------')
        print('Patient  No', idx, 'Alert')
        for index in result:
            print(index, ':', result[index])
Esempio n. 6
0
def main():
    if len(sys.argv) < 2:
        print "To inline hook some function   :   python RewriterMain.py rewrite ApkDir FunctionSignature"
        print "To perform static analysis     :   python RewriterMain.py analyze ApkDir"
        print "To glue APIHook to the package :   python RewriterMain.py glue ApkDir"
        return

    Command = sys.argv[1]
    ApkDir = sys.argv[2]
    if Command == "rewrite":
        FuncSig = sys.argv[3]
        worker = Rewriter()
        worker.reset(FuncSig)
    elif Command == "analyze":
        worker = Analyzer()
        worker.reset()
    elif Command == "glue":
        worker = Gluer()
        worker.reset(ApkDir)
    elif Command == "attach":
        worker = Attacher()
        worker.reset(ApkDir)

    for dirname, _, filenames in os.walk(ApkDir):
        for filename in filenames:
            fname = os.path.join(dirname, filename)
            worker.dofile(fname)

    worker.postprocess()
Esempio n. 7
0
def analyzeImageBB(recvPipe, sendPipe):

    loggerProcessor.info('Init Analyzing Loop')

    dTree = DecisionTree()
    dTree.importTree(TREE_DIRECTORY)

    motor = Actuate.Actuate("P8_13", 3, -1)
    motor.startup()
    motor.actuate("FAR")

    currentPrescription = "FAR"

    while ANALYZE_IMAGES:
        pass

        img = recvPipe.recv()
        # rightImg = pipe.recv()

        if img is not None:
            (x, y) = Analyzer(img).getEyeData().getPupilCentreCandidate(
                db.Eyeball.FilterOptions.REFLECTION)
            loggerProcessor.info('Got x: {} y: {}'.format(x, y))
            sendPipe.send((x, y))
        else:
            loggerProcessor.error('Image was none')
            sendPipe((-1, -1))
Esempio n. 8
0
 def test_date_before_start_keyerror_should_appear(self):
     start = "2010-01-01"
     date = '2009-05-28'
     tickers = list(gt.get_biggest_n_tickers(40))
     b = Analyzer(tickers, start)
     #self.assertRaises(KeyError, lambda: b.winners(date, 25, 5))
     self.assertEqual(b.winners(date, 25, 5), [])
Esempio n. 9
0
def test_capture():
    print("testing simple capture and advance")
    print("\tsimple choice white")
    b = Board()
    b.board = [[0, 0, 0, 0, 0, 0, 0, -999], [0, 0, 0, 0, 0, 0, 0, 0],
               [0, 1, 0, 0, 0, 0, 0, 0], [-1, 0, -3, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 999]]

    a = Analyzer()
    a.sd_limit = 2
    res = a.minimax(b, 20)
    assert (res.move == ((2, 1), (3, 2)))

    print("\tsimple choice black")
    b = Board()
    b.turn = -1
    b.board = [[0, 0, 0, 0, 0, 0, 0, 999], [0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0], [1, 0, 3, 0, 0, 0, 0, 0],
               [0, -1, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0],
               [0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, -999]]

    # a = Analyzer(20)
    a.sd_limit = 2
    res = a.minimax(b, 20)
    assert (res.move == ((4, 1), (3, 2)))
Esempio n. 10
0
 def ch2Graph(self, resultDict, graph):
     ''' Sets title and labels for choice two. Calls _placeValues to plot values. Returns analysis string. '''
     
     # Graph setup
     graph.set_title("Monthly Price Percent Change for Stock Indices NASDAQ, S&P 500, Dow Jones during last 12 Months")  # Set title
     graph.set_xlabel("Date")        # Set x-axis label
     graph.set_ylabel("Percentage Change in Closing Price")      # Set y-axis label
     
     
     # For each index in resultDict (dictionary of list)
     for index in resultDict: 
         # Unzip dates and prices for each index - original dictionary has dates in most recent order
         dates, prices = zip(*reversed(resultDict[index])) 
         # Create a numpy array and convert each price to float
         prices = np.array(prices).astype(float)
         # Compute percent change betwen each data point
         prices = [0] + ((np.diff(prices)/ prices[:-1]) * 100)
         # Replace dictionary value with list of dates and price percent change
         resultDict[index] = [*zip(reversed(dates), reversed(prices))]
     
     # Graph dictionary values
     self._placeValues(resultDict, graph)
     
     # Analysis
     a = Analyzer()
     # Get percent change and index name from analysis of dictionary values
     analysis = a.compareTwo(resultDict)   
     
     # Return analysis string with best performing index and its percent change to GUI
     return analysis       
Esempio n. 11
0
    def generation_phase(self) -> None:
        """
        Generates molecules using a pre-trained model.
        """
        print("* Setting up generation job.", flush=True)
        self.load_training_set_properties()
        self.restart_epoch = self.constants.generation_epoch
        self.analyzer = Analyzer(valid_dataloader=None,
                                 train_dataloader=None,
                                 start_time=self.start_time)

        print(
            f"* Loading model from saved state (Epoch {self.restart_epoch}).",
            flush=True)
        model_path = self.constants.job_dir + f"model_restart_{self.restart_epoch}.pth"
        self.model = self.create_model()
        try:
            # for loading models created using GraphINVENT v1.0 (will raise an exception
            # if model was created with GraphINVENT v2.0)
            self.model.state_dict = torch.load(model_path).state_dict()
        except AttributeError:
            # for loading models created using GraphINVENT v2.0
            self.model.load_state_dict(torch.load(model_path))

        self.model.eval()
        with torch.no_grad():
            self.generate_graphs(n_samples=self.constants.n_samples)

        self.print_time_elapsed()
Esempio n. 12
0
    def training_phase(self) -> None:
        """
        Trains model and generates graphs.
        """
        print("* Setting up training job.", flush=True)
        self.train_dataloader = self.get_dataloader(
            hdf_path=self.train_h5_path, data_description="training set")
        self.valid_dataloader = self.get_dataloader(
            hdf_path=self.valid_h5_path, data_description="validation set")

        self.load_training_set_properties()
        self.create_output_files()
        self.analyzer = Analyzer(valid_dataloader=self.valid_dataloader,
                                 train_dataloader=self.train_dataloader,
                                 start_time=self.start_time)

        start_epoch, end_epoch = self.define_model_and_optimizer()

        print("* Beginning training.", flush=True)
        for epoch in range(start_epoch, end_epoch):

            self.current_epoch = epoch
            avg_train_loss = self.train_epoch()
            avg_valid_loss = self.validation_epoch()

            util.write_model_status(epoch=self.current_epoch,
                                    lr=self.optimizer.param_groups[0]["lr"],
                                    training_loss=avg_train_loss,
                                    validation_loss=avg_valid_loss)

            self.evaluate_model()

        self.print_time_elapsed()
Esempio n. 13
0
    def get_ts_properties(self, molecular_graphs: list,
                          group_size: int) -> None:
        """
        Gets molecular properties for group of molecular graphs, only for the
        training set.

        Args:
        ----
            molecular_graphs (list) : Contains `PreprocessingGraph`s.
            group_size (int) : Size of "group" (i.e. slice of graphs).
        """
        if self.is_training_set:

            analyzer = Analyzer()
            ts_properties = analyzer.evaluate_training_set(
                preprocessing_graphs=molecular_graphs)

            # merge properties of current group with the previous group analyzed
            if self.ts_properties:  # `self.ts_properties` is a dictionary
                self.ts_properties = analyzer.combine_ts_properties(
                    prev_properties=self.ts_properties,
                    next_properties=ts_properties,
                    weight_next=group_size)
            else:  # `self.ts_properties` is None (has not been calculated yet)
                self.ts_properties = ts_properties
        else:
            self.ts_properties = None
Esempio n. 14
0
def handle_game_create(json, methods = ["GET", "POST"]):
    if json["gid"] == "lobby" or json["gid"] in games.keys():
        return
    grid = Grid(json["width"], json["height"], True)
    analyzer = Analyzer(lexicons.get(json["language"].lower(), lexicons["english"]), json["language"])
    games[json["gid"]] = Game(json["gid"], GameProperties(min_letters = json["minLetters"], minutes = json["minutes"]), grid, analyzer, send_game_update, list_request_callback, send_analysis_callback)
    socketio.emit("game_list_update", { gid:(g.encode()) for gid,g in games.items() }, room = "lobby")
def main():
    # user_id, age, gender, heartrate, Systolic_BP, Diastolic_BP, blood_oxygen, temperature, time):
    data = {}
    LINES = open("input").read().splitlines()
    for idx, line in enumerate(LINES):
        items = line.split()
        #print(items)
        Data = input_module(items[0], items[1], items[2], items[3], items[4],
                            items[5], items[6], items[7], items[8])
        #Data=input_module.implement_filter(Data)
        data[idx] = Data.dic
        print data[idx]

    #def __init__(self, Systolic_BP, Diastolic_BP, Heart_Rate, Heart_Oxy_Level, Body_temp):
    for idx, content in enumerate(data):
        process = Analyzer(data[idx]["Systolic_BP"], data[idx]["Diastolic_BP"],
                           data[idx]["heartrate"], data[idx]["blood_oxygen"],
                           data[idx]["temperature"])
        signal_loss = process.Signal_Loss(data[idx]["heartrate"],
                                          data[idx]["temperature"])
        shock_alert = process.Shock_Alert(data[idx]["heartrate"],
                                          data[idx]["temperature"])
        oxygen_supply = process.Oxygen_Supply(data[idx]["blood_oxygen"])
        fever = process.Fever(data[idx]["temperature"])
        hypotension = process.Hypotension(data[idx]["Systolic_BP"],
                                          data[idx]["Diastolic_BP"])
        hypertension = process.Hypertension(data[idx]["Systolic_BP"],
                                            data[idx]["Diastolic_BP"])

        result = Output_Module.display_basic_iuput_data(
            signal_loss, shock_alert, oxygen_supply, fever, hypotension,
            hypertension)
        print 'Patient', idx, 'Alert'
        print result
Esempio n. 16
0
def process_thread():
    while True:
        print("analyzing")
        data_dic = input_queue.get()
        if data_dic is None:
            break

        # use analyzer to trigger the alert
        myAnalyzer = Analyzer(data_dic["Systolic_BP"],
                              data_dic["Diastolic_BP"], data_dic["Heart_Rate"],
                              data_dic["Heart_O2_Level"],
                              data_dic["Body_temp"])
        # call functions to trigger the alert
        Signal_Loss = myAnalyzer.Signal_Loss(myAnalyzer.Heart_Rate,
                                             myAnalyzer.Body_temp)
        Shock_Alert = myAnalyzer.Shock_Alert(myAnalyzer.Heart_Rate,
                                             myAnalyzer.Body_temp)
        Oxygen_Supply = myAnalyzer.Oxygen_Supply(myAnalyzer.Heart_O2_Level)
        Fever = myAnalyzer.Fever(myAnalyzer.Body_temp)
        Hypotension = myAnalyzer.Hypotension(myAnalyzer.Systolic_BP,
                                             myAnalyzer.Diastolic_BP)
        Hypertension = myAnalyzer.Hypertension(myAnalyzer.Systolic_BP,
                                               myAnalyzer.Diastolic_BP)

        # generate regular output base on presented data
        basic_result = receive_basic_iuput_data(Signal_Loss, Shock_Alert,
                                                Oxygen_Supply, Fever,
                                                Hypotension, Hypertension)
        time.sleep(1)
        print("done")
        output_queue.put(basic_result)
Esempio n. 17
0
def analyze(args):
    print "Inside analyze with args:"
    print 'Input Directory: {0}'.format(args.input_dir)
    print 'Output File: {0}'.format(args.output_file)
    lyzer = Analyzer()
    lyzer.loadResults(args.input_dir)
    lyzer.writeAnalysis(args.output_file)
    pass
Esempio n. 18
0
def main():

    #configuring Twitter API
    configurator = Configurator()
    api = configurator.returnAPI()

    analyzer = Analyzer(api)
    analyzer.filterByTerm("created_at", "tweets.json")
Esempio n. 19
0
 def __init__(self, redis_client: redis.client.Redis):
     timees = redis_client.get('redditUpdated').decode('UTF-8')
     print(timees)
     self.date_updated_reddit = float(timees)
     self.date_updated_twitter = False
     self.date_updated_coindesk = False
     self.redis_client = redis_client
     #self.preprocessor = Preprocessor(self.redis_client)
     self.analyzer = Analyzer(self.redis_client)
Esempio n. 20
0
    def __init__(self, fileName):
        try:
            self.sourceFile = open(fileName, 'r')
        except IOError:
            sys.exit("Source file not found")

        self.scanner = Scanner(self.sourceFile)
        self.symbolTableStack = SymbolTableStack()
        self.analyzer = Analyzer(fileName, self.symbolTableStack)
Esempio n. 21
0
    def __init__(self, parent=None):
        super(MainWindow, self).__init__(parent)

        self.analyzer = Analyzer()
        self.currentSolution = None

        self._buildUI()
        self._loadSettings()
        QTimer.singleShot(0, self._loadInitialData)
Esempio n. 22
0
    def simulate(self, gamestate):
        if len(gamestate.BlackStones) == len(gamestate.WhiteStones):
            turn = self.AIStoneType if self.AIStoneType == "black" else self.EnemyStoneType
        elif len(gamestate.BlackStones) > len(gamestate.WhiteStones):
            turn = self.AIStoneType if self.AIStoneType == "white" else self.EnemyStoneType
        board = super().DuplicateBoard(gamestate)
        winchecker = WinChecker(board)
        while True:

            if not super().GetOpenMovesPlus(board):
                if winchecker.Check(turn) and turn == self.AIStoneType:
                    wins = 1
                    break
                elif winchecker.Check(turn) and turn == self.EnemyStoneType:
                    wins = -1
                    break
                else:
                    wins = 0
                    break
            hvalues = []
            for stone in super().GetOpenMovesPlus(board):
                edited_board = super().GenerateCustomGameBoard(
                    board, stone, turn)
                heuristicvalue = Analyzer(edited_board).Grader(
                    turn) + -Analyzer(edited_board).Grader(
                        self.EnemyStoneType if turn ==
                        self.AIStoneType else self.AIStoneType)
                hvalues.append((stone, heuristicvalue))
            hvalues = sorted(hvalues, key=lambda x: x[1], reverse=True)
            final_choice = hvalues[0][0]
            print("Heuristics:", final_choice, turn, hvalues[0][1])
            #final_choice = random.choice(super().GetOpenMovesPlus(board))
            board.AddStone(
                turn, final_choice
            )  # change random moving to selecting best move according to heuristics
            if winchecker.Check(turn) and turn == self.AIStoneType:
                wins = 1
                break
            elif winchecker.Check(turn) and turn == self.EnemyStoneType:
                wins = -1
                break
            turn = self.EnemyStoneType if turn == self.AIStoneType else self.AIStoneType
        print("simulation result", wins)
        return wins
Esempio n. 23
0
def analyzeImageBB(args):
    img = args[0]
    side = args[1]
    loggerProcessor.info('Analyzing:' + side)

    if img is not None:
        return Analyzer(img).getEyeData().getPupilCentreCandidate(
            db.Eyeball.Eyeball.FilterOptions.REFLECTION)
    else:
        return (-1, -1)
Esempio n. 24
0
    def setUp(self):
        grid = Grid(5, 5, True)
        valid_words = PrefixTrie("lexicons/prefix_trie_test.txt")
        analyzer = Analyzer(valid_words, "English")

        self.game = Game('test', GameProperties(min_letters=4, minutes=4),
                         grid, analyzer, game_update_callback,
                         list_request_callback, send_analysis_callback)
        self.game.add_player("T-block")
        self.game.add_player("O-block")
        self.game.add_player("I-block")
Esempio n. 25
0
 def get(self):
     mid = request.args.get('mid')
     if mid:
         an = Analyzer(mid)
         if an.error:
             return {'message': 'cannot found talk'}
         else:
             an.start()
             return {'status': 'success'}
     else:
         return {'message': 'cannot found mid'}
Esempio n. 26
0
 def __initEa(self):
     #mid 1)signal 控制参数
     self.InKLine = True
     self.longAllowed = True
     self.shortAllowed = True
     self.__shortPeriod = 20
     self.__longPeriod = 40
     #mid 2)signal 计算指标图形化输出控制
     #self.toPlot = True
     self.analyzer = Analyzer(Globals=[])
     #mid 3)money 风险策略控制
     self.money = moneySecond.moneySecond()
Esempio n. 27
0
def analyzeImageBB(pipe):

    loggerProcessor.info('Init Analyzing Loop')

    dTree = DecisionTree()
    dTree.importTree(TREE_DIRECTORY)

    motor = Actuate.Actuate("P8_13", "P9_14", 3, -1)
    motor.startup()
    motor.actuate("FAR")

    currentPrescription = "FAR"

    while ANALYZE_IMAGES:
        pass

        leftImg = pipe.recv()
        rightImg = pipe.recv()

        if leftImg is not None and rightImg is not None:
            ipTime = int(time.time())
            (xL, yL) = Analyzer(leftImg).getEyeData().getPupilCentreCandidate(
                db.Eyeball.Eyeball.FilterOptions.REFLECTION)
            (xR, yR) = Analyzer(rightImg).getEyeData().getPupilCentreCandidate(
                db.Eyeball.Eyeball.FilterOptions.REFLECTION)
            loggerProcessor.info('Got x: {} y: {}'.format(xL, yL))
            loggerProcessor.info('Got x: {} y: {}'.format(xR, yR))

            if all(v != -1 for v in (xL, yL, xR, yR)):
                #                pupils = {'x1': xR, 'x2': yR, 'x3': xL,'x4': yL}
                pupils = {'x1': yR, 'x2': xR, 'x3': yL, 'x4': xL}
                prescription = dTree.traverseTree(pupils, dTree.root)
                loggerProcessor.info('vergence computed: %s', prescription)

                if currentPrescription is not prescription:
                    motor.actuate(prescription)
                    currentPrescription = prescription
        else:
            loggerProcessor.error('Image was none')
Esempio n. 28
0
def get_highest_freq_words(doc, n=None, calc_n=True):
    preprocesser = Preprocesser()
    d = {doc.id: doc.content}
    preprocesser.tokenize(d.items(), remove_stopwords=True)
    doc_tokenized = preprocesser.corpus_tokenized

    analyzer = Analyzer()
    freqs = analyzer.get_frequencies(doc_tokenized, None)
    if calc_n is True:
        n = int(len(freqs) / 20)
    elif n is None:
        n = len(freqs)
    return dict(freqs[:n])
Esempio n. 29
0
def processCSV(name, path, hub):

    if name in useless_csv:
        return

    global tot_data
    global tot_duplicates
    global tot_left
    global tot_nan
    global tot_owner
    global tot_before_equal_after
    global tot_comm_to_comm
    global tot_no_comment
    global tot_no_marked
    global tot_no_method_after
    global tot_no_method_before
    global tot_no_valid_ref
    global tot_triplets

    try:
        df = pd.read_csv(filepath_or_buffer=path)
        analyzer = Analyzer(df, hub)
        analyzer.remove_duplicates()
        analyzer.remove_owner_comments()
        analyzer.remove_nan_data()
        dfr = analyzer.analyze_data()

        tot_data += len(df)
        tot_duplicates += analyzer.duplicates
        tot_left += analyzer.left_side_cases
        tot_nan += analyzer.nan_data
        tot_owner += analyzer.owner_comments
        tot_before_equal_after += analyzer.before_equal_after
        tot_comm_to_comm += analyzer.comm_to_comm
        tot_no_comment += analyzer.no_comment
        tot_no_marked += analyzer.no_marked
        tot_no_method_after += analyzer.no_method_after
        tot_no_method_before += analyzer.no_method_before
        tot_no_valid_ref += analyzer.no_valid_ref
        tot_triplets += len(dfr)

        if len(dfr) > 0:
            # print('current csv: ', name, " ------ ", len(dfr))
            dfr.to_csv("./processed/" + name)
        else:
            print('+++++ USELESS CSV: ', name)

    except Exception as e:
        print("----- CVS unreadable: ", name)
        print(e)
Esempio n. 30
0
def get_highest_freq_words(id, n=500, calc_m=True):
    serializer = Serializer()
    name = str(id) + "_tokens_stopwords-excluded_cs.corpus"
    corpus_tokenized = serializer.load(corpora_path + str(id) + "/" + name)

    analyzer = Analyzer()
    freqs = analyzer.get_frequencies(corpus_tokenized, n)

    # if calc_m is True:
    #     m = int(len(freqs)/2)
    # elif n is None:
    #     m = len(freqs)
    # return dict(freqs[:m])
    return dict(freqs)