Ejemplo n.º 1
0
    def parse_urls(self, html):
        """
        Produces a list of URLs present in the given html.

        :type html: str
        :rtype:     list
        """

        soup = BeautifulSoup(html, "html.parser")
        urls = []

        # (presumably) only in the main page
        for element in soup.findAll("h2", {"class": "section-heading"}):
            if element.a:
                url = element.a.get("href")
                if url not in self.visited_urls:
                    urls.append(Utility.clean_url(url))

        # in main page and appears as relevant articles
        for element in soup.findAll("a", {"class": "story-link"}):
            url = element.get("href")
            if url not in self.visited_urls:
                urls.append(Utility.clean_url(url))

        return urls
Ejemplo n.º 2
0
def load_data():
    """Loads, cleans and transforms the raw text data and
     returns the tokenized source and target phrases"""
    data_loader = dl.DialogLoaderTransformer(
        data_directory=dl.DATA_DIRECTORY,
        delimiter=dl.DELIMITER,
        movie_titles_headers=dl.MOVIES_TITLE_HEADERS,
        movie_lines_headers=dl.MOVIE_LINES_HEADERS,
        movie_conversation_headers=dl.MOVE_CONVERSATION_SEQUENCE_HEADERS)
    # loading and cleaning
    source_texts, target_texts = data_loader.get_training_data(genre=GENRE,
                                                               shuffle=True)

    tokenizer.fit_on_text(source_texts + target_texts,
                          min_keep_frequency=MIN_TOKEN_FREQ)
    # converting texts to numbers
    source_sequences = tokenizer.convert_text_to_number(source_texts)
    target_sequences = tokenizer.convert_text_to_number(target_texts)
    source_sequences, target_sequences = tokenizer.filter(
        source_numbers=source_sequences,
        target_numbers=target_sequences,
        max_token_size=MAX_TOKEN_LENGTH,
        remove_unknown=True)
    # converting numbers to tensors
    source_sequences = Utility.tensorize(source_sequences,
                                         dtype=torch.long,
                                         device=DEVICE)
    target_sequences = Utility.tensorize(target_sequences,
                                         dtype=torch.long,
                                         device=DEVICE)

    return source_sequences, target_sequences
Ejemplo n.º 3
0
    def __init__(self):
        self.ignored = 0
        self.init_url = settings.init_url
        self.pattern = re.compile(settings.nyc_regex)
        self.remaining = settings.max_pages
        self.visited_urls = set()

        Utility.reset_cache(settings.cache_directory)
Ejemplo n.º 4
0
 def setUp(self):
     random.seed(0)
     self.id1 = random.randint(1, 10000)
     self.id2 = random.randint(1, 10000)
     self.name1 = str(uuid.uuid4())
     self.name2 = str(uuid.uuid4())
     self.catalog = Catalog()
     self.catalog.add_table(SkeletonFile(self.id1, Utility.get_tupledesc(2)), self.name1)
     self.catalog.add_table(SkeletonFile(self.id2, Utility.get_tupledesc(2)), self.name2)
Ejemplo n.º 5
0
    def __init__(self):
        self.nyc_regex = settings.nyc_regex
        try:
            self.files = [file for file in os.listdir(settings.cache_directory)]
        except FileNotFoundError:
            print("Nothing to scrape")
            self.files = []

        Utility.reset_cache(settings.output_directory)
Ejemplo n.º 6
0
 def __init__(self):
     self.utility = Utility()
     self.pincode = PinCode()
     self.phone_number_lookup = PhoneNumberLookup()
     self.phone_number = PhoneNumber(self.phone_number_lookup)
     self.output_dir = "output_dir"
     self.ms_office = MsOffice()
     self.utility = Utility()
     self.state_mapper = StateMapper()
     self.district_mapper = DistrictMapper()
Ejemplo n.º 7
0
    def __init__(self):
        self.ignored = 0
        self.max_pages = settings.max_pages
        self.pattern = re.compile(settings.nyc_regex)
        self.url_queue = deque()
        self.url_queue.append(settings.init_url)
        self.visited_pages = 0
        self.visited_urls = set()

        Utility.reset_cache(settings.cache_directory)
Ejemplo n.º 8
0
    def test_combine(self):
        td1 = Utility.get_tupledesc(1, "td1")
        td2 = Utility.get_tupledesc(2, "td2")

        td3 = TupleDesc.merge(td1, td2)
        self.assertEqual(3, td3.num_fields())
        self.assertEqual(3 * IntType.get_len(), td3.get_size())
        for i in range(3):
            self.assertEqual(IntType, td3.get_field_type(i))

        self.assertEqual(self.combined_string_arrays(td1, td2, td3), True)
Ejemplo n.º 9
0
    def test_duplicate_ids(self):
        new_name = str(uuid.uuid4())
        f = SkeletonFile(self.id2, Utility.get_tupledesc(2))

        self.catalog.add_table(f, new_name)
        self.assertEqual(new_name, self.catalog.get_table_name(self.id2))
        self.assertEqual(f, self.catalog.get_database_file(self.id2))
Ejemplo n.º 10
0
 def __init__(self, phone_lookup):
     self.utility = Utility()
     phone_number_prefixes = [
         " contact no ", " mobile, no ", " mobail no ", " mobile no ",
         " mobal nbr ", "mobail", "mobail no", " phone no ", " mobil no ",
         " cell no ", " cell ", " noumber ", " contact ", " mobile/",
         " mob no*", " wtsp ", " mobile ", " mb nbr ", " mob no ",
         " m no/*", " phone ", ",ph no ", " po no ", " mobil ", " ph no ",
         " m no_*", " m no/ ", " mob/*", " c no ", " phon ", " m no ",
         "phone ", " mob,", ",no *", " mob ", " mb ", " mob*", " no *",
         ",mo *", " pn ", " po ", " ph ", " nm ", " mo ", " m *",
         " number ", " nub ", " mob nub-", "no,", "phn num,", "phn num"
     ]
     self.phone_number_prefixes = self.utility.reverse_list(
         sorted(list(set(phone_number_prefixes)), key=len))
     self.phone_lookup = phone_lookup
Ejemplo n.º 11
0
    def generate_n_best_move(self, current_state, possible_moves, n=5):
        """Generate n best moves for Optimized Hill Climbing Algorithm

        Args:
            current_state (State): current state
            possible_moves (list(dict(from, to))): List possible moves of current player
            n (int, optional): Number of generated moves. Defaults to 5.

        Returns:
            list(dict(from, to)): List of dictionary from and to
        """
        arr_tup = []
        idx_moves = 0
        temp_state = current_state.deepcopy()
        for possible_move in possible_moves:
            idx_to = 0
            for possible_to in possible_move['to']:
                temp_state.board.move_pawn(possible_move['from'], possible_to)
                arr_tup.append(
                    (Utility.utility_function(temp_state), [idx_moves,
                                                            idx_to]))
                temp_state.board.move_pawn(possible_to, possible_move['from'])
                idx_to += 1
            idx_moves += 1
        n_best = sorted(arr_tup,
                        key=lambda x: x[0],
                        reverse=not (current_state.currentPlayer
                                     == current_state.player_2))[:n]
        result = [{
            'from': possible_moves[loc[0]]['from'],
            'to': [possible_moves[loc[0]]['to'][loc[1]]]
        } for _, loc in n_best]
        return result
Ejemplo n.º 12
0
    def local_search(self, current_state, possible_moves, algorithm="SA"):
        """Local search using Simulated Annealing Algorithm or Optimized Hill-Climbing

        Args:
            current_state (State): Current state
            possible_moves (list(dict(from, to))): List possible moves of current player
            depth (int): Depth

        Returns:
            (list(dict(from, to))): new possible_moves with less possible moves
        """
        # 1/5 dari batas waktu setiap depth (asumsi waktu alokasi tiap depth uniform,
        # dan butuh 4/5 waktu untuk menelusuri pohon)
        if (algorithm == "SA"):
            sa_time = time() + self.t_limit / (self.max_depth * 5)
            current_value = Utility.utility_function(current_state)
            generated_moves = []
            while True:
                curr_time = sa_time - time()
                if curr_time <= 0 or not possible_moves: return generated_moves
                next_move, possible_moves = self.generate_random_move(
                    possible_moves)
                if next_move:
                    delta_e = self.generate_delta_e(next_move, current_state,
                                                    current_value)
                    if delta_e > 0: generated_moves.append(next_move)
                    elif exp(delta_e / curr_time):
                        generated_moves.append(next_move)
        else:
            return self.generate_n_best_move(current_state, possible_moves)
Ejemplo n.º 13
0
    def minimax(self,
                state,
                is_max,
                depth=0,
                alpha=float("-inf"),
                beta=float("inf"),
                algorithm="optimized"):
        """Minimax + Local Search Algorithm for solving Halma Checker

        Parameters:
            state (State): game state
            is_max (Boolean): is maxing the objective value

        Returns:
            Tuple: Tuple of (best_move objective value, best_move)
        """
        # Terminate
        if self.terminate(depth, state):
            return None, Utility.utility_function(state)

        # Recursive
        possible_moves = state.current_player_possible_moves()

        if self.which_player == state.currentPlayer and depth == 0:
            # Jika giliran bot player, maka jalankan localsearch pada depth = 0
            # untuk mengambil beberapa possible moves saja
            possible_moves = self.local_search(state, possible_moves,
                                               algorithm)
        #Jika bukan bot, pertimbangkan semua moves
        return self.search(is_max, possible_moves, state, depth, alpha,
                           beta)  #minimax search
Ejemplo n.º 14
0
Archivo: io.py Proyecto: 80xer/irewe
 def print_df(self, filename, df):
     try:
         util = Utility()
         filewritetime = datetime.datetime.now()
         filepath = self.output_path + "\\%s.txt" % (filename)
         filepath = "%s.txt" % (filename)
         f = open(filepath, 'wb')
         writer = csv.writer(f)
         writer.writerows(df)
         f.close()
         util.printKeyValue('    write file Time diff',
                            datetime.datetime.now() - filewritetime, ' ',
                            True, True)
         return filepath
     except Exception as inst:
         print type(inst)
         print inst.args
Ejemplo n.º 15
0
Archivo: io.py Proyecto: 80xer/irewe
 def print_df(self, filename, df):
     try:
         util = Utility()
         filewritetime = datetime.datetime.now()
         filepath = self.output_path + "\\%s.txt"%(filename)
         filepath = "%s.txt"%(filename)
         f = open(filepath, 'wb')
         writer = csv.writer(f)
         writer.writerows(df)
         f.close()
         util.printKeyValue(
         '    write file Time diff',
         datetime.datetime.now() - filewritetime, ' ', True, True)
         return filepath
     except Exception as inst:
         print type(inst)
         print inst.args
Ejemplo n.º 16
0
    def __init__(self, validation, verbose, json_string, time) -> None:
        super().__init__()

        self.__validation = validation
        self.__verbose = verbose
        self.__json = Utility.load_json(json_string)
        self.__time = time

        self.print()
Ejemplo n.º 17
0
 def test_split_train_test_splits_based_on_fraction(self):
     """tests the methods properly splits the input sequence."""
     sources = [1, 2, 3, 4, 5, 6, 7, 8, 9]
     targets = [11, 22, 33, 44, 55, 66, 77, 88, 99]
     test_fraction = 0.4
     train_source, train_target, test_source, test_target = Utility.split_train_test(
         sources, targets, test_fraction=test_fraction)
     self.assertEqual(train_source, [4, 5, 6, 7, 8, 9])
     self.assertEqual(train_target, [44, 55, 66, 77, 88, 99])
     self.assertEqual(test_source, [1, 2, 3])
     self.assertEqual(test_target, [11, 22, 33])
Ejemplo n.º 18
0
    def __init__(self, schema, json_string) -> None:

        super().__init__()

        self.__schema = schema
        self.__json = Utility.load_json(json_string)
        self.__message = None
        self.__is_valid = True

        # validate
        self.validate_json()
Ejemplo n.º 19
0
    def parse_urls(self, html):
        """
        Appends new URLs present in the given html too the URL queue.

        :type html: str
        """

        soup = BeautifulSoup(html, "html.parser")

        # this is (presumably) only in the main page
        for element in soup.findAll("h2", {"class": "section-heading"}):
            if element.a:
                url = element.a.get("href")
                if url not in self.visited_urls:
                    self.url_queue.append(Utility.clean_url(url))

        # in main page and appear as relevant articles
        for element in soup.findAll("a", {"class": "story-link"}):
            url = element.get("href")
            if url not in self.visited_urls:
                self.url_queue.append(Utility.clean_url(url))
Ejemplo n.º 20
0
    def test_combine(self):
        td = Utility.get_tupledesc(2)
        tup = Tuple(td)
        tup.set_field(0, IntField(-1))
        tup.set_field(1, IntField(0))

        self.assertEqual(IntField(-1), tup.get_field(0))
        self.assertEqual(IntField(0), tup.get_field(1))

        tup.set_field(0, IntField(1))
        tup.set_field(1, IntField(37))

        self.assertEqual(IntField(1), tup.get_field(0))
        self.assertEqual(IntField(37), tup.get_field(1))
Ejemplo n.º 21
0
    def generate_delta_e(self, next_move, current_state, current_value):
        """Generate deltaE for Simulated Annealing Algorithm

        Args:
            next_move (dict(from, to)): next possible move
            current_state (State): current state
            current_value (float): current state value

        Returns:
            float: deltaE value
        """
        current_state.board.move_pawn(next_move['from'], next_move['to'][0])
        next_value = Utility.utility_function(current_state)
        current_state.board.move_pawn(next_move['to'][0], next_move['from'])
        return next_value - current_value
Ejemplo n.º 22
0
    def predict(cls, text):
        """Receives an array of raw texts and returns the predicted response
           using the Greedy Search method.
           Example:
               text=['Who are you?]
        """
        assert isinstance(text, list)

        tokens = tokenizer.convert_text_to_number(text)
        if cls._has_unrecognized_words(tokens):
            return ["Sorry, there is a word that I don't understand:\n"]
        else:
            response, indexes = Utility.predict(
                source_texts=text,
                model=encoder_decoder,
                tokenizer=tokenizer,
                device=DEVICE,
                max_prediction_len=MAX_PRED_LENGTH)
            return response[0]
Ejemplo n.º 23
0
    def minimax(self,
                state,
                is_max,
                depth=0,
                alpha=float("-inf"),
                beta=float("inf")):
        """Minimax Algorithm for solving Halma Checker

        Parameters:
            state (State): game state
            is_max (Boolean): is maxing the objective value

        Returns:
            Tuple: Tuple of (best_move objective value, best_move)
        """
        # Terminate
        if self.terminate(depth, state):
            return None, Utility.utility_function(state)

        # Recursive
        possible_moves = state.current_player_possible_moves()
        return self.search(is_max, possible_moves, state, depth, alpha, beta)
Ejemplo n.º 24
0
    def scrape(file_name, html):
        """
        Scrapes the given HTML.
        """

        strainer = SoupStrainer(["span", "h1", "p"])
        soup = BeautifulSoup(html, "html.parser", parse_only=strainer)

        try:
            author = soup.find("span", {
                "class": "byline-author"
            }).getText().title()
            date = Utility.clean_date(
                soup.find("time", {
                    "class": "dateline"
                }).get("datetime"))
            title = soup.find("h1", {"id": "headline"}).getText()

            # appends article bodies
            content = []
            for para in soup.findAll("p", {"class": "story-content"}):
                content.append(para.getText())

            with open(
                    os.path.join(settings.output_directory,
                                 file_name).replace(".html", ".json"),
                    "w") as file:
                article = {
                    "author": author,
                    "content": content,
                    "date": date,
                    "title": title,
                }

                json.dump(article, file, indent=4, ensure_ascii=False)

        except AttributeError:
            pass
Ejemplo n.º 25
0
 def __init__(self, db, const):
     self.db = db
     self.utility = Utility()
     self.CONST = const
Ejemplo n.º 26
0
Archivo: run.py Proyecto: 80xer/irewe
logger.info('%s' % ('{:*^60}'.format('')))


# db config
configParser = ConfigParser.RawConfigParser()
configParser.read(r'config')
config = {
    'user': configParser.get('db-config', 'user'),
    'password': configParser.get('db-config', 'password'),
    'host': configParser.get('db-config', 'host'),
    'database': configParser.get('db-config', 'database'),
}

dbs = db.DbHelper(config)
qr = db.queries(dbs, const)
util = Utility(opts.debug)
util.setLogger(logger)

if opts.dv is None and opts.loop is True:
    dvs = qr.getDvs(opts.userId)
else:
    dvs = ((opts.seq, opts.dv),)

cnt = 0
try:
    for dv in dvs:
        cnt = cnt + 1
        seq = dv[0]
        dv = dv[1]
        params = ()
        params = qr.getSetup(opts.userId, seq, dv)
Ejemplo n.º 27
0
# 1. Access to facilities
#     * Number of public basic health centers / 1000 inhabitants
#     * Number of beds / 1000 inhabitants
#
# 1. Access to medicine
#     * Number of pharmacies / 1000 inhabitants

# %% [markdown] {"toc-hr-collapsed": false}
# ## Access to professionals

# %% [markdown]
# ### Number of doctors / 1000 inhabitants

# %%
u_ndocs = Utility(name="ndocs",
                  optimal_fit=True,
                  data=data["Number of doctors"].values)
u_ndocs.assess()
u_ndocs.fit()

# %%
data.columns

# %%
u_ndocs.result.params.valuesdict()

# %% [markdown]
# ### Number of paramedical staff / 1000 inhabitants

# %%
u_nparam = Utility(name="nparam",
Ejemplo n.º 28
0
 def __init__(self):
     self.utility = Utility()
     pin_number_prefixes = [" pin ", " pin/ ", " pin_ ", " cod ", "p/c ", " pinn cod ", " code ", " cd ",
                            " pincode ", " cod ", "pin ", "pin,", " pinkod ", "pin-", " pino "]
     self.pin_number_prefixes = self.utility.reverse_list(sorted(list(set(pin_number_prefixes)), key=len))
Ejemplo n.º 29
0
class PinCode:

    def __init__(self):
        self.utility = Utility()
        pin_number_prefixes = [" pin ", " pin/ ", " pin_ ", " cod ", "p/c ", " pinn cod ", " code ", " cd ",
                               " pincode ", " cod ", "pin ", "pin,", " pinkod ", "pin-", " pino "]
        self.pin_number_prefixes = self.utility.reverse_list(sorted(list(set(pin_number_prefixes)), key=len))

    def update_pin_number(self, address_obj):
        location_mapper = PinLocationMapper()

        hilighted_pin_list = self.get_pin_code_hilighted(address_obj)
        if hilighted_pin_list is not None and len(hilighted_pin_list) > 0:
            hilighted_pin = hilighted_pin_list[0]
            pin = hilighted_pin.replace("*", "")
            address_obj.pin = pin
            address_obj.address = address_obj.address.replace(hilighted_pin, "").strip()
            pin_location = location_mapper.get_address_details(pin)
            if pin_location is not None and len(pin_location) > 0:
                # print(pin_location)
                state, district, block = pin_location.split(",")
                if state is not None and district is not None and block is not None:
                    address_obj.state = state
                    address_obj.district = district
                    address_obj.block = block
            if pin is not None:
                address_obj.address = self.utility.white_space_cleaner(address_obj.address) + " Pin " + pin
        return

    def pad_pin_code(self, text_input, pad_word):
        text = text_input
        space = " "
        pin_regex_0 = "[<]\d{6}[>]"  # |<334333>|
        pin_regex_1 = "[ ]\d{6}$"  # | 334333|
        pin_regex_2 = "[ ]\d{6}[ ]"  # | 334333 |
        pin_regex_3 = "[^0-9*]\d{6}[^0-9*]"  # |n334333d|
        pin_regex_4 = "[ ]\d{4}[ ]\d{2}[ ]"  # | 3343 33 |
        pin_regex_5 = "[^0-9]\d{3}[ ]\d{3}"  # |334 333|
        pin_regex_6 = "[ ]\d{4}[ ]\d{2}$"  # | 3343 33|
        pin_regex_7 = "[ ]\d{4}[ ]\d{2}[ ]"  # | 3343 33 |
        pin_regex_8 = "[^0-9*]\d{6}[ ]"  # |n3343 33 |
        pin_regex_9 = "[ ]\d{6}[^0-9*]"  # |334333n|

        pin_regex_0_matches = re.findall(pin_regex_0, text)
        pin_regex_1_matches = re.findall(pin_regex_1, text)
        pin_regex_2_matches = re.findall(pin_regex_2, text)
        pin_regex_3_matches = re.findall(pin_regex_3, text)
        pin_regex_4_matches = re.findall(pin_regex_4, text)
        pin_regex_5_matches = re.findall(pin_regex_5, text)
        pin_regex_6_matches = re.findall(pin_regex_6, text)
        pin_regex_7_matches = re.findall(pin_regex_7, text)
        pin_regex_8_matches = re.findall(pin_regex_8, text)
        pin_regex_9_matches = re.findall(pin_regex_9, text)

        if len(pin_regex_0_matches) > 0:
            for match in set(pin_regex_0_matches):
                pin = match[1:-1]
                padded_match = (space + pad_word + pin + pad_word + space)
                return text.replace(match, padded_match)
        if len(pin_regex_1_matches) > 0:
            #print("match 1")
            for match in set(pin_regex_1_matches):
                padded_match = space + pad_word + match.replace(" ", "") + pad_word + space
                return text.replace(match, padded_match)
        if len(pin_regex_2_matches) > 0:
            #print("match 2")
            for match in set(pin_regex_2_matches):
                padded_match = space + pad_word + match.replace(" ", "") + pad_word + space
                return text.replace(match, padded_match)
        if len(pin_regex_3_matches) > 0:
            #print("match 3")
            for match in set(pin_regex_3_matches):
                first_char = match[0]
                last_char = match[-1]
                pin = match[1:-1]
                padded_match = (first_char + space + pad_word + pin + pad_word + space + last_char)
                text = text.replace(match, padded_match)
            return text
        if len(pin_regex_4_matches) > 0:
            #print("match 4")
            for match in set(pin_regex_4_matches):
                padded_match = space + pad_word + match.replace(" ", "") + pad_word + space
                text = text.replace(match, padded_match)
            return text
        if len(pin_regex_5_matches) > 0:
            #print("match 5")
            for match in set(pin_regex_5_matches):
                prefix = match[0]
                padded_match = prefix + space + pad_word + match.replace(" ", "") + pad_word + space
                text = text.replace(match, padded_match)
            return text
        if len(pin_regex_6_matches) > 0:
            #print("match 6")
            for match in set(pin_regex_6_matches):
                padded_match = space + pad_word + match.replace(" ", "") + pad_word + space
                text = text.replace(match, padded_match)
            return text
        if len(pin_regex_7_matches) > 0:
            #print("match 7")
            for match in set(pin_regex_7_matches):
                padded_match = space + pad_word + match.replace(" ", "") + pad_word + space
                text = text.replace(match, padded_match)
            return text
        if len(pin_regex_8_matches) > 0:
            #print("match 8")
            for match in set(pin_regex_8_matches):
                pin = match[1:-1]
                padded_match = space + pad_word + pin + pad_word + space
                text = text.replace(match, padded_match)
            return text
        if len(pin_regex_9_matches) > 0:
            #print("match 9")
            for match in set(pin_regex_9_matches):
                pin = match[1:-1]
                last_char = match[-1]
                padded_match = space + pad_word + pin + pad_word + space + last_char
                text = text.replace(match, padded_match)
            return text
        return text_input

    def pin_number_text_remover(self, text):
        if text is not None:
            address = text.lower()

            for prefix in self.pin_number_prefixes:
                if address.find(prefix) != -1 and prefix.find(",") != -1 and prefix.find("*") != -1:
                    address = address.replace(prefix, ", *")
                if address.find(prefix) != -1 and prefix.find("*") != -1:
                    address = address.replace(prefix, " *")
                if address.find(prefix) != -1:
                    address = address.replace(prefix, " ")
            return address

    def get_pin_code_hilighted(self, address_obj):
        #print(address_obj.address)
        highlighted_pin_code_regex = "[*]\d{6}[*]"
        pin_codes = re.findall(highlighted_pin_code_regex, address_obj.address)
        #print(pin_codes)
        return list(set(pin_codes))

    def pin_code_extender(self, text):
        #Bangalore
        regex_1 = r"[bB]angalore[ -]\d{2}[ ,]"
        matches_1 = re.findall(regex_1, text)
        if len(matches_1) > 0:
            for match in matches_1:
                result = re.sub(r"[^a-zA-Z0-9]", " ", match)
                pin_matches = re.findall(r"\d{2}", result)
                for short_pin in pin_matches:
                    pin = "5600" + short_pin
                    result = result.replace(short_pin, pin)
                text = text.replace(match, result)
        return text
Ejemplo n.º 30
0
 def test_get_tupledesc(self):
     td = Utility.get_tupledesc(5)
     tup = Tuple(td)
     self.assertEqual(td, tup.tuple_desc)
Ejemplo n.º 31
0
def get_options():
    parser = set_options()
    (options, args) = parser.parse_args()

    util = Utility()

    if options.fix is False and options.userId is '':
        print 'insert fix or userId options'
        print u'fix 또는 id 옵션을 설정하세요.'
        sys.exit()

    if options.fix:
        options.userId = 'system'

    if options.dv is '' or options.dv is None and \
            options.loop is False or options.loop is None:
        util.printKeyValue('Insert DV code', '')
        sys.exit()


    # print options
    util.printLine()
    util.printKeyValue('debug', options.debug, open=False)
    util.printKeyValue('fix', options.fix, open=False)
    util.printKeyValue('userId', options.userId, open=False)
    util.printKeyValue('seq', options.seq, open=False)
    util.printKeyValue('dv', options.dv, open=False)
    util.printKeyValue('loop', options.loop, open=False)
    util.printKeyValue('shift', options.shift, open=False)
    util.printLine()

    return options
Ejemplo n.º 32
0
Archivo: read.py Proyecto: 80xer/irewe
class ReadModule():

    """ 생성자
    ReadModule은 t0와 t1 사이의 데이터만 읽는다.
    """
    def __init__(self, params):
        self.t0 = params['t0']
        self.t1 = params['t1']
        self.utility = Utility()
        self.params  = params

    # 엑셀파일 읽기
    def read_file(self, path):
        print ''
        print u"%s 파일 읽기 시작"%(path)
        workbook = xlrd.open_workbook(path)
        sheets = workbook.sheets()
        result = []
        for sh in sheets:
            one_sheet_data = self.extract_from_sheet(workbook, sh)
            result.extend(one_sheet_data)
        print u"%s 파일 읽기 완료"%(path)
        return result

    # 시트에서 데이터 추출하기
    def extract_from_sheet(self, book, sh, date_col=0, id_row=2, nm_row=3, unit_row=4, start_col=1, start_row=5):
        series_result = []
        du = DateUtility()
        date_values = sh.col_values(date_col, start_rowx=start_row, end_rowx=sh.nrows) # 날짜 값
        date_type = sh.col_types(date_col, start_rowx=start_row, end_rowx=sh.nrows)    # 날짜 타입

        # io_values = sh.col_values(date_col-1)

        date_result = []
        for i in range(len(date_values)):

            if date_type[i] == 3: # 날짜 형식
                date_tuple = xlrd.xldate_as_tuple(date_values[i], book.datemode)
                date_result.append(datetime.date(date_tuple[0], date_tuple[1], date_tuple[2]))
            elif date_type[i] == 2:
                date_str = str(int(date_values[i]))
                date_result.append(datetime.datetime.strptime(date_str, '%Y%m%d').date())
                pass              # 문자열일 경우 처리해 줘야 할듯

        col_cnt = sh.row_len(id_row)

        for i in range(col_cnt)[start_col:]:
        # for i in range(col_cnt)[start_col:4]:
            io_type = sh.cell(id_row-1, i).value
            name = sh.cell(nm_row, i).value
            code = self.utility.convert_code(sh.cell(id_row, i).value)
            unit = sh.cell(unit_row, i).value
            series = Series(self.params)
            series.io_type = io_type
            series.code = code
            series.name = name
            series.group = unit
            series.value = sh.col_values(i, start_row)
            series.date = date_result
            series.data_cleansing(self.t0, self.t1)
            series.set_freq()

            # Full data 만 sheet list에 등록
            du = DateUtility()


            # Full data 만 sheet list에 등록
            if series.date[0] <= self.t0 and series.date[-1] >= self.t1:
                series_result.append(series)

            # if series.date[0] <= self.t0 and series.date[-1] >= du.subtract_months(self.t1, 4):
            #     series_result.append(series)

        return series_result

    # column 개수 구하기
    def column_len(sheet, index):
        col_values = sheet.col_values(index)
        col_len = len(col_values)
        for _ in takewhile(lambda x: not x, reversed(col_values)):
            col_len -= 1
        return col_len
Ejemplo n.º 33
0
Archivo: db.py Proyecto: 80xer/irewe
    def insert_report(self, data):
        util = Utility()

        util.printKeyValue('in output', '', open=True)
        atime = datetime.datetime.now()
        self.insert_iv(data)
        util.printKeyValue('    iv Time diff',
                           datetime.datetime.now() - atime, ' ', True, True)

        btime = datetime.datetime.now()
        self.insert_factor(data)
        util.printKeyValue('    factor Time diff',
                           datetime.datetime.now() - btime)

        ctime = datetime.datetime.now()
        self.insert_factor_weight(data)
        util.printKeyValue('    factor_weight Time diff',
                           datetime.datetime.now() - ctime)

        dtime = datetime.datetime.now()
        self.insert_factor_parent()
        util.printKeyValue('    factor parent Time diff',
                           datetime.datetime.now() - dtime)

        etime = datetime.datetime.now()
        self.insert_warning_board_idx(data)
        util.printKeyValue('    index Time diff',
                           datetime.datetime.now() - etime)
Ejemplo n.º 34
0
class queries():
    def __init__(self, db, const):
        self.db = db
        self.utility = Utility()
        self.CONST = const

    def getDvs(self, id):
        dataTuples = self.db.exeData(
            self.CONST.QR_SELECT_ALL_DV % id)
        return dataTuples

    def getSetup(self, id, seq, dvcd):

        # 변수별 컬럼 값
        ID_NM = 0
        SEQ = 1
        DV = 2
        START_DT = 3
        END_DT = 4
        LEARN_DT = 5
        NTS = 6
        FILTER = 7
        PCA = 8
        LAG = 9
        SCALING = 10
        LAG_CUT = 11
        SHIFT = 12
        DIR = 13
        THRESHOLD = 14

        dataTuples = self.db.exeData(
            self.CONST.QR_SELECT_DV_SETUP % (dvcd, id, seq))
        dbData = dataTuples[0]

        result = {}
        result['id_nm'] = dbData[ID_NM]
        result['seq'] = dbData[SEQ]
        result['nts_thres'] = dbData[NTS]
        result['t0'] = datetime.datetime.strptime(str(dbData[START_DT]) + '01', '%Y%m%d').date()
        result['t1'] = datetime.datetime.strptime(str(dbData[END_DT]) + '01', '%Y%m%d').date()
        result['t2'] = datetime.datetime.strptime(str(dbData[LEARN_DT]) + '01', '%Y%m%d').date()
        result['pca_thres'] = dbData[PCA]
        result['intv'] = int(dbData[LAG])
        result['lag_cut'] = int(dbData[LAG_CUT])
        result['scaling'] = dbData[SCALING]
        result['hp_filter'] = dbData[FILTER]
        result['dv'] = dbData[DV]
        result['dv_dir'] = dbData[DIR]
        result['thres_cut'] = 0.2  # .2 고정
        result['dv_thres'] = dbData[THRESHOLD]
        result['shift'] = dbData[SHIFT]
        self.params = result
        return result

    def getDv(self, dv):
        result = []
        dataTuples = self.db.exeData(self.CONST.QR_SELECT_DV % dv)
        dbData = self.extract_from_list(dataTuples)
        result.extend(dbData)
        return result

    def getITemsFromDV(self, dv):
        items = []
        return items

    def getItems(self, id, seq, dvcd):

        items = self.db.exeData(self.CONST.QR_SELECT_ITEM % (dvcd))

        itemCdSelect = []
        itemNmSelect = []
        pathSelect = []
        dataSelect = []
        cnt = 0

        itemCdSelect.append("select '', ")
        itemNmSelect.append("select '', ")
        pathSelect.append("select 'TRD_DT', ")
        dataSelect.append("select concat(a.trd_dt,'01'), ")

        for item in items:
            itemCdSelect.append(
                "MAX(iF(a.item_cd = '" + item[0]
                + "', a.item_cd, null)) 'I'")
            itemNmSelect.append(
                "MAX(iF(a.item_cd = '" + item[0]
                + "', concat(a.item_nm, '_', a.unit), null)) ")
            pathSelect.append(
                "MAX(iF(a.item_cd = '" + item[0]
                + "', a.path, null)) ")
            dataSelect.append(
                "MAX(iF(a.item_cd = '" + item[0]
                + "', a.amount, null)) ")
            if cnt < len(items) - 1:
                itemCdSelect.append(', ')
                itemNmSelect.append(', ')
                pathSelect.append(', ')
                dataSelect.append(', ')
            cnt = cnt + 1

        itemCdSelect.append(
            "from iwbs_ind_var_mast a, iwbs_indust_mast b "
            "where b.dv_cd = '" + dvcd + "' and a.item_cd = b.item_cd")
        itemNmSelect.append(
            "from iwbs_ind_var_mast a, iwbs_indust_mast b "
            "where b.dv_cd = '" + dvcd + "' and a.item_cd = b.item_cd")
        pathSelect.append(
            "from iwbs_ind_var_mast a, iwbs_indust_mast b "
            "where b.dv_cd = '" + dvcd + "' and a.item_cd = b.item_cd")
        dataSelect.append(
            "from iwbs_ind_var_data a, iwbs_indust_mast b "
            "where b.dv_cd = '" + dvcd + "' and a.item_cd = b.item_cd "
            "group by a.trd_dt")

        allSelect = []
        allSelect.append(''.join(itemCdSelect))
        allSelect.append(" union all ")
        allSelect.append(''.join(itemNmSelect))
        allSelect.append(" union all ")
        allSelect.append(''.join(pathSelect))
        allSelect.append(" union all ")
        allSelect.append(''.join(dataSelect))

        result = []
        dataTuples = self.db.exeData(''.join(allSelect))
        dbData = self.extract_from_list(dataTuples)
        result.extend(dbData)
        return result

    def extract_from_list(self, data):
        series_result = []
        date_result = []
        du = DateUtility()
        date_col = 0
        id_row = 0
        nm_row = 1
        unit_row = 2
        start_col = 1
        start_row = 3
        date_values = du.getCol_values(data, date_col, start_row, len(data))

        for i in range(len(date_values)):
            date_str = str(int(date_values[i]))
            date_result.append(datetime.datetime.strptime(date_str, '%Y%m%d').date())
            pass

        col_cnt = len(data[id_row])
        io_type = 'I'
        for i in range(col_cnt)[start_col:]:
            name = data[nm_row][i]
            code = self.utility.convert_code(data[id_row][i])
            unit = data[unit_row][i]
            series = Series(self.params)
            series.io_type = io_type
            series.code = code
            series.name = name
            series.group = unit
            series.value = du.getCol_values(data, i, start_row, len(data))
            series.date = date_result
            series.data_cleansing(self.params['t0'], self.params['t1'])
            series.set_freq()

            if len(series.date) > 0 and series.date[0] <= self.params['t0'] and series.date[-1] >= self.params['t1']:
                series_result.append(series)

        return series_result
Ejemplo n.º 35
0
    def start(self):
        util = Utility(self.options.debug)
        util.setLogger(self.logger)
        iv_total = []

        # 디비에서 독립변수 받기

        atime = datetime.datetime.now()
        items = self.qr.getItems(self.options.userId, self.params['seq'],
                                 self.params['dv'])
        # 유저 셋팅
        #  아이템 받기
        iv_total.extend(items)

        # 디비에서 종속변수 받기
        dv = self.qr.getDv(self.params['dv'])

        # debug 용 데이터 축소
        # if options.debug:
        #     iv_total = iv_total[:12]
        #     print "length of iv_total is %s" % len(iv_total)

        util.printKeyValue('    GetItems Time diff',
                           datetime.datetime.now() - atime)

        du = DateUtility()

        interpolated_time = datetime.datetime.now()
        # t0와 t1 월별날짜 리스트 계산
        month_list_str, month_list_months = du.get_montly_span(
            self.params['t0'], self.params['t1'])

        # out of sample months
        month_list_str_out, month_list_months_out = du.get_montly_span(
            self.params['t0'], self.params['t2'])

        iv_total_out = copy.deepcopy(iv_total)

        iv_info_dict = {}

        iv_total_out_time = datetime.datetime.now()

        for iv in iv_total:
            iv.set_monthly_data()  # 같은월에 여러 데이터중 최신 데이터만
            # 내삽
            iv.set_interpolated_data(month_list_months, month_list_str)
            iv_info_dict[iv.code] = {}
            iv_info_dict[iv.code]['group'] = iv.group

        util.printKeyValue('    interpolated Time diff',
                           datetime.datetime.now() - interpolated_time)

        for iv in iv_total_out:
            iv.set_monthly_data()
            iv.set_interpolated_data(month_list_months_out, month_list_str_out)
        # --------------------------------------------------

        util.printKeyValue('    iv_total_out Time diff',
                           datetime.datetime.now() - iv_total_out_time)

        dv[0].set_monthly_data()
        dv[0].set_interpolated_data(month_list_months, month_list_str)

        dv_out = copy.deepcopy(dv)
        dv_out[0].set_monthly_data()
        dv_out[0].set_interpolated_data(month_list_months_out,
                                        month_list_str_out)

        # 월중 최신데이터만 선택, 내삽 완료.

        df_iv_time = datetime.datetime.now()
        df_iv = read.convert_series_list_to_dataframe(iv_total)

        # out of sample months
        df_iv_out = read.convert_series_list_to_dataframe(iv_total_out)
        # --------------------------------------------------

        util.printKeyValue('    df_iv, df_iv_out Time diff',
                           datetime.datetime.now() - df_iv_time)

        # 전처리 작업 구동
        pp = PreProcessing()

        df_time = datetime.datetime.now()
        # ADF 테스트 후 차분
        df_iv, df_iv_out = pp.get_adf_test_after_df(df_iv, df_iv_out,
                                                    iv_info_dict)
        util.printKeyValue('    adf_test Time diff',
                           datetime.datetime.now() - df_time)

        filter_time = datetime.datetime.now()
        # Hp Filter
        df_iv = pp.get_hp_filter(df_iv, self.params['hp_filter'])
        util.printKeyValue('    df_iv_filter Time diff',
                           datetime.datetime.now() - filter_time)

        df_iv_time = datetime.datetime.now()
        # out of sample months ------------------------------------------------
        df_iv_out = pp.get_hp_filter(df_iv_out, self.params['hp_filter'])
        util.printKeyValue('    df_iv_out_filter Time diff',
                           datetime.datetime.now() - df_iv_time)
        # ---------------------------------------------------------------------

        df_dv_time = datetime.datetime.now()
        # 종속변수
        df_dv = read.convert_series_list_to_dataframe(dv)
        df_dv_out = read.convert_series_list_to_dataframe(dv_out)

        df_dv = df_dv[1:].reset_index(drop=True)  # 맨 앞 데이터 차분
        df_dv_out = df_dv_out[1:].reset_index(drop=True)

        if int(self.params['scaling']) == 1:
            df_iv, df_iv_out = pp.scale_iv(df_iv, df_iv_out)

        df_iv['DV'] = df_dv[df_dv.columns[2]]
        df_iv_out['DV'] = df_dv_out[df_dv_out.columns[2]]  # out of sample
        util.printKeyValue('    df_dv_out Time diff',
                           datetime.datetime.now() - df_dv_time)

        nts_time = datetime.datetime.now()
        # nts 계산
        nts_module = NtsCaldulator()
        dv_crisis_digit_list, dv_thres = \
            nts_module.cal_nts_total(
                df_iv, iv_info_dict,
                self.params['intv'],
                self.params['thres_cut'],
                self.params['dv_thres'],
                self.params['lag_cut'],
                self.params['dv_dir']
            )
        # iv_info_dict 에 nts 관련 정보 적재  (2016.03.10) nts 계산에서 \
        # 선행기간 내 위기식별 구간 제한 추가작업 lag_cut
        # nts_module.cal_nts_by_digit(df_iv, dv_crisis_digit_list)
        util.printKeyValue('    cal_nts_total Time diff',
                           datetime.datetime.now() - nts_time)

        df_iv_digit_time = datetime.datetime.now()
        # nts 에 따른 thres와 digit 저장
        df_iv_digit = nts_module.get_iv_sh_digit(df_iv, iv_info_dict,
                                                 self.params['dv_thres'],
                                                 self.params['dv_dir'])
        util.printKeyValue('    get_iv_sh_digit Time diff',
                           datetime.datetime.now() - df_iv_digit_time)

        srt_time = datetime.datetime.now()
        srted = sorted(iv_info_dict.iteritems(),
                       key=self.get_value,
                       reverse=False)
        filtered = [s for s in srted if s[1]['nts'] < self.params['nts_thres']]
        util.printKeyValue('    sorted Time diff',
                           datetime.datetime.now() - srt_time)

        factor_time = datetime.datetime.now()
        code_list = []
        for f in filtered:
            code_list.append(f[0])

        pca_module = PcaCalculator()

        y, wt, fracs, df_factor, df_factor_out = \
            pca_module.run_cap(
                df_iv[code_list],
                df_iv_out[code_list],
                self.params['pca_thres']
            )

        factor_weight = {}
        factor_weight['col_list'] = df_iv[code_list].columns.tolist()
        factor_weight['weight'] = wt
        factor_weight['fracs'] = fracs

        # df_factor_yyyymm 출력용
        df_factor_series = df_factor.copy()
        df_factor_series['YYYYMM'] = df_iv['YYYYMM'].tolist()
        df_factor_series['DV'] = df_iv['DV'].tolist()

        # df_factor_yyyymm 출력용
        df_factor_series_out = df_factor_out.copy()
        df_factor_series_out['YYYYMM'] = df_iv['YYYYMM'].tolist()
        df_factor_series_out['DV'] = df_iv['DV'].tolist()

        factor_info_dict = {}
        for col in df_factor.columns:
            factor_info_dict[col] = {}

        # df_factor['DV'] = df_dv_sh[df_dv_sh.columns[2]]
        nts_module.cal_nts_total(df_factor_series, factor_info_dict,
                                 self.params['intv'], self.params['thres_cut'],
                                 self.params['dv_thres'],
                                 self.params['lag_cut'], self.params['dv_dir'])
        # (2016.03.10) nts 계산에서 선행기간 내 위기식별 구간 제한 추가작업 lag_cut

        for i in range(len(df_factor.columns.tolist())):
            factor_info_dict[df_factor.columns.tolist()[i]]['weight'] = \
                factor_weight['fracs'][i]
        util.printKeyValue('    factor Time diff',
                           datetime.datetime.now() - factor_time)

        idx_time = datetime.datetime.now()
        # 위기지수 계산
        df_warning_idx = self.cal_warning_idx(factor_info_dict,
                                              df_factor_series)
        df_warning_idx_out = \
            self.cal_warning_idx(factor_info_dict, df_factor_series_out)

        result = {}
        # result['params'] = params
        result['iv_raw'] = iv_total
        # result['iv_code'] = iv_code
        result['iv_info_dict'] = iv_info_dict
        result['df_iv'] = df_iv
        result['df_iv_digit'] = df_iv_digit
        result['factor_info_dict'] = factor_info_dict
        result['df_factor_yyyymm'] = df_factor_series
        result['df_warning_idx'] = df_warning_idx
        result['df_warning_idx_out'] = df_warning_idx_out
        result['dv_thres'] = dv_thres
        result['factor_weight'] = factor_weight
        util.printKeyValue('    cal idx Time diff',
                           datetime.datetime.now() - idx_time)
        return result
Ejemplo n.º 36
0
Archivo: read.py Proyecto: 80xer/irewe
 def __init__(self, params):
     self.t0 = params['t0']
     self.t1 = params['t1']
     self.utility = Utility()
     self.params  = params
Ejemplo n.º 37
0
    def start(self):
        util = Utility(self.options.debug)
        util.setLogger(self.logger)
        iv_total = []

        # 디비에서 독립변수 받기

        atime = datetime.datetime.now()
        items = self.qr.getItems(
            self.options.userId,
            self.params['seq'],
            self.params['dv']
        )
        # 유저 셋팅
        #  아이템 받기
        iv_total.extend(items)

        # 디비에서 종속변수 받기
        dv = self.qr.getDv(self.params['dv'])

        # debug 용 데이터 축소
        # if options.debug:
        #     iv_total = iv_total[:12]
        #     print "length of iv_total is %s" % len(iv_total)

        util.printKeyValue(
            '    GetItems Time diff',
            datetime.datetime.now() - atime)

        du = DateUtility()

        interpolated_time = datetime.datetime.now()
        # t0와 t1 월별날짜 리스트 계산
        month_list_str, month_list_months = du.get_montly_span(
            self.params['t0'],
            self.params['t1'])

        # out of sample months
        month_list_str_out, month_list_months_out = du.get_montly_span(
            self.params['t0'],
            self.params['t2'])

        iv_total_out = copy.deepcopy(iv_total)

        iv_info_dict = {}

        iv_total_out_time = datetime.datetime.now()

        for iv in iv_total:
            iv.set_monthly_data()  # 같은월에 여러 데이터중 최신 데이터만
            # 내삽
            iv.set_interpolated_data(month_list_months, month_list_str)
            iv_info_dict[iv.code] = {}
            iv_info_dict[iv.code]['group'] = iv.group

        util.printKeyValue(
            '    interpolated Time diff',
            datetime.datetime.now() - interpolated_time)

        for iv in iv_total_out:
            iv.set_monthly_data()
            iv.set_interpolated_data(month_list_months_out,
                                     month_list_str_out)
        # --------------------------------------------------

        util.printKeyValue(
            '    iv_total_out Time diff', datetime.datetime.now() -
                                        iv_total_out_time)

        dv[0].set_monthly_data()
        dv[0].set_interpolated_data(month_list_months, month_list_str)

        dv_out = copy.deepcopy(dv)
        dv_out[0].set_monthly_data()
        dv_out[0].set_interpolated_data(month_list_months_out,
                                          month_list_str_out)

        # 월중 최신데이터만 선택, 내삽 완료.

        df_iv_time = datetime.datetime.now()
        df_iv = read.convert_series_list_to_dataframe(iv_total)

        # out of sample months
        df_iv_out = read.convert_series_list_to_dataframe(
            iv_total_out)
        # --------------------------------------------------

        util.printKeyValue(
            '    df_iv, df_iv_out Time diff', datetime.datetime.now() -
                                        df_iv_time)

        # 전처리 작업 구동
        pp = PreProcessing()

        df_time = datetime.datetime.now()
        # ADF 테스트 후 차분
        df_iv, df_iv_out = pp.get_adf_test_after_df(df_iv,
                                                    df_iv_out,
                                                    iv_info_dict)
        util.printKeyValue(
            '    adf_test Time diff',
            datetime.datetime.now() - df_time)

        filter_time = datetime.datetime.now()
        # Hp Filter
        df_iv = pp.get_hp_filter(df_iv, self.params['hp_filter'])
        util.printKeyValue(
            '    df_iv_filter Time diff',
            datetime.datetime.now() - filter_time)

        df_iv_time = datetime.datetime.now()
        # out of sample months ------------------------------------------------
        df_iv_out = pp.get_hp_filter(df_iv_out, self.params['hp_filter'])
        util.printKeyValue(
            '    df_iv_out_filter Time diff',
            datetime.datetime.now() - df_iv_time)
        # ---------------------------------------------------------------------

        df_dv_time = datetime.datetime.now()
        # 종속변수
        df_dv = read.convert_series_list_to_dataframe(dv)
        df_dv_out = read.convert_series_list_to_dataframe(dv_out)

        df_dv = df_dv[1:].reset_index(drop=True)  # 맨 앞 데이터 차분
        df_dv_out = df_dv_out[1:].reset_index(drop=True)

        if int(self.params['scaling']) == 1:
            df_iv, df_iv_out = pp.scale_iv(df_iv, df_iv_out)

        df_iv['DV'] = df_dv[df_dv.columns[2]]
        df_iv_out['DV'] = df_dv_out[df_dv_out.columns[2]]    # out of sample
        util.printKeyValue(
            '    df_dv_out Time diff',
            datetime.datetime.now() - df_dv_time)

        nts_time = datetime.datetime.now()
        # nts 계산
        nts_module = NtsCaldulator()
        dv_crisis_digit_list, dv_thres = \
            nts_module.cal_nts_total(
                df_iv, iv_info_dict,
                self.params['intv'],
                self.params['thres_cut'],
                self.params['dv_thres'],
                self.params['lag_cut'],
                self.params['dv_dir']
            )
        # iv_info_dict 에 nts 관련 정보 적재  (2016.03.10) nts 계산에서 \
        # 선행기간 내 위기식별 구간 제한 추가작업 lag_cut
        # nts_module.cal_nts_by_digit(df_iv, dv_crisis_digit_list)
        util.printKeyValue(
            '    cal_nts_total Time diff',
            datetime.datetime.now() - nts_time)

        df_iv_digit_time = datetime.datetime.now()
        # nts 에 따른 thres와 digit 저장
        df_iv_digit = nts_module.get_iv_sh_digit(df_iv, iv_info_dict,
                                                    self.params['dv_thres'],
                                                    self.params['dv_dir'])
        util.printKeyValue(
            '    get_iv_sh_digit Time diff',
            datetime.datetime.now() - df_iv_digit_time)

        srt_time = datetime.datetime.now()
        srted = sorted(iv_info_dict.iteritems(),
                       key=self.get_value,
                       reverse=False)
        filtered = [s for s in srted if s[1]['nts'] < self.params['nts_thres']]
        util.printKeyValue(
            '    sorted Time diff',
            datetime.datetime.now() - srt_time)

        factor_time = datetime.datetime.now()
        code_list = []
        for f in filtered:
            code_list.append(f[0])

        pca_module = PcaCalculator()

        y, wt, fracs, df_factor, df_factor_out = \
            pca_module.run_cap(
                df_iv[code_list],
                df_iv_out[code_list],
                self.params['pca_thres']
            )

        factor_weight = {}
        factor_weight['col_list'] = df_iv[code_list].columns.tolist()
        factor_weight['weight'] = wt
        factor_weight['fracs'] = fracs

        # df_factor_yyyymm 출력용
        df_factor_series = df_factor.copy()
        df_factor_series['YYYYMM'] = df_iv['YYYYMM'].tolist()
        df_factor_series['DV'] = df_iv['DV'].tolist()

        # df_factor_yyyymm 출력용
        df_factor_series_out = df_factor_out.copy()
        df_factor_series_out['YYYYMM'] = df_iv['YYYYMM'].tolist()
        df_factor_series_out['DV'] = df_iv['DV'].tolist()

        factor_info_dict = {}
        for col in df_factor.columns:
            factor_info_dict[col] = {}

        # df_factor['DV'] = df_dv_sh[df_dv_sh.columns[2]]
        nts_module.cal_nts_total(
            df_factor_series,
            factor_info_dict,
            self.params['intv'],
            self.params['thres_cut'],
            self.params['dv_thres'],
            self.params['lag_cut'],
            self.params['dv_dir']
        )
        # (2016.03.10) nts 계산에서 선행기간 내 위기식별 구간 제한 추가작업 lag_cut

        for i in range(len(df_factor.columns.tolist())):
            factor_info_dict[df_factor.columns.tolist()[i]]['weight'] = \
                factor_weight['fracs'][i]
        util.printKeyValue(
            '    factor Time diff',
            datetime.datetime.now() - factor_time)

        idx_time = datetime.datetime.now()
        # 위기지수 계산
        df_warning_idx = self.cal_warning_idx(factor_info_dict,
                                              df_factor_series)
        df_warning_idx_out = \
            self.cal_warning_idx(factor_info_dict, df_factor_series_out)

        result = {}
        # result['params'] = params
        result['iv_raw'] = iv_total
        # result['iv_code'] = iv_code
        result['iv_info_dict'] = iv_info_dict
        result['df_iv'] = df_iv
        result['df_iv_digit'] = df_iv_digit
        result['factor_info_dict'] = factor_info_dict
        result['df_factor_yyyymm'] = df_factor_series
        result['df_warning_idx'] = df_warning_idx
        result['df_warning_idx_out'] = df_warning_idx_out
        result['dv_thres'] = dv_thres
        result['factor_weight'] = factor_weight
        util.printKeyValue(
            '    cal idx Time diff',
            datetime.datetime.now() - idx_time)
        return result