Ejemplo n.º 1
0
def read_connection(model_id):
    try:

        insert_record = {}
        insert_record["Timestamp"] = datetime.now()
        insert_record["S_Model_id"] = model_id
        insert_record["Result"] = []
        update_collection("nnresults", 'S_Model_id', model_id, insert_record)
        read_model = read_collectionbyid("nnmodels", model_id)
        #print(read_counters)
        lines_count = read_model['NumOfRecords']
        con_id = read_model['ConId']
        read_con = read_collectionbyid("nnconnections", con_id)
        ConType = read_con['ConType']

        if (ConType == "1"):
            dir_path = read_con['FileLocation']
            #filestream(con_id,dir_path,lines_count)
            print("Reading DataSet Started")
            filestream(con_id, dir_path, lines_count, model_id)

        else:
            broker_endpoints = read_con['BrokerEndPoint']
            topics_list = read_con['TopicName']
            print(broker_endpoints)
            #kafkastream(con_id,broker_endpoints,topics_list,lines_count)
            print("Reading DataSet Started")
            raw_input_data = kafkastream(con_id, broker_endpoints, topics_list,
                                         lines_count)
            print("Reading DataSet Ended")
            print("Pre-Processing Data Started")
            Model_input_Data, text_field_LE, scaler, unique_val_list = pre_processing(
                raw_input_data)
            print("Pre-Processing Data Ended")
            print(Model_input_Data.head())
            print("Training Model Started")
            result, normal, abnormal = trainmodel(Model_input_Data)
            print("Training Model Ended")
            print(result.head(100))
            print("Post-Processing Data Started")
            final_result = postprocessing(Model_input_Data, abnormal, scaler,
                                          text_field_LE, unique_val_list)
            print("Post-Processing Data Ended")
            # print(normal.head(100))
            # print(final_result)

    except Exception as e:
        exc_type, exc_obj, exc_tb = sys.exc_info()
        fname = os.path.split(exc_tb.tb_frame.f_code.co_filename)[1]
        print(exc_type, fname, exc_tb.tb_lineno)
        logging.error("Model Created Failed:" + str(sys.exc_info()))
        update_collection(
            "nnconnections", "_id", ObjectId(con_id), {
                "Status":
                "Failed",
                "Error_Message":
                "Creating connection got error while making DB connection. Error Message <--> "
                + str(sys.exc_info())
            })
        raise
Ejemplo n.º 2
0
def read_connection(model_id):
    #try:
    read_model = read_collectionbyid("nnmodels", model_id)
    lines_count = read_model['NumOfRecords']
    TrainConId = read_model['TrainConId']

    if TrainConId == "":
        TrainConId = read_model['ConId']
    read_con = read_collectionbyid("nnconnections", TrainConId)
    ConType = read_con['ConType']
    if (ConType == "1"):
        dir_path = read_con['FileLocation']
        print("Reading DataSet Started")
        raw_input_data = filestream(TrainConId, dir_path, lines_count)

    else:
        broker_endpoints = read_con['BrokerEndPoint']
        topics_list = read_con['TopicName']
        #print(broker_endpoints)
        raw_input_data = kafkastream(TrainConId, broker_endpoints, topics_list,
                                     lines_count)

    print("Reading DataSet Ended")
    print("Pre-Processing Data Started")
    Model_input_Data, text_field_LE, scaler, unique_val_list = pre_processing(
        raw_input_data, model_id)
    print("Pre-Processing Data Ended")
    print("Training Model Started")
    result, normal, abnormal = trainmodel(Model_input_Data, model_id)
    print("Training Model Ended")
    print("Post-Processing Data Started")
    postprocessing(result, abnormal, scaler, text_field_LE, unique_val_list,
                   model_id)
    print("Post-Processing Data Ended")
Ejemplo n.º 3
0
def read_connection(con_id):
    try:
        read_counters = read_collectionbyid("nnconnections", con_id)
        #print(read_counters)
        con_type = read_counters['ConType']

        if (con_type == "1"):
            dir_path = read_counters['FileLocation']
            #lines_count = read_counters['NoOfRecords']
            file_con_onerecord(con_id, dir_path, lines_count=1)

        else:
            broker_endpoints = read_counters['BrokerEndPoint']
            topics_list = read_counters['TopicName']
            #lines_count = read_counters['NoOfRecords']
            onerecord(con_id, broker_endpoints, topics_list, lines_count=1)
    except:
        logging.error("Unable to read connection details from DB.." +
                      str(sys.exc_info()))
        update_collection(
            "nnconnections", "_id", ObjectId(con_id), {
                "Status":
                "Failed",
                "Error_Message":
                "Creating connection got error while making DB connection. Error Message <--> "
                + str(sys.exc_info())
            })
Ejemplo n.º 4
0
def stream_model(data_processed=pd.DataFrame(), model_id=''):
    Local_File_Path = os.path.dirname(os.path.realpath(__file__))
    MODEL_PATH = Local_File_Path + "/Model/model.h5"
    TB_LOG_DIR = Local_File_Path + "/TB"
    MODEL_SAVE_PATH = Local_File_Path + "/Model/model.json"
    OUT_PATH = "/node-parser/data/Results/"
    RANDOM_STATE = 77
    THRESHOLD = 3  #2*STD away from mean will be
    logger = logging.getLogger(os.path.basename(__file__))
    #logger = __get_logger()
    db_record = read_collectionbyid('nnmodels', model_id)
    config_over_field_name = db_record['MainField']
    config_job_function = db_record['ModelType']
    config_job_sub_function = db_record['ModelMethod']
    X_test_index = data_processed.index
    test_columns = list(data_processed.columns)
    ix = test_columns.index(config_over_field_name)
    test = pd.DataFrame(data_processed.iloc[:, ix])
    if config_job_sub_function == 'NN':
        X_test = test.values.reshape(test.shape[0], 1)
        # Build the Model
        ad_obj = AD.AnomalyDetection(logger)
        #Load Model
        model = ad_obj.load_nn_model(MODEL_PATH, MODEL_SAVE_PATH)
        model = ad_obj.compile_nn_model(model)
        result = ad_obj.predict_nn_model(model, X_test)
        # ad_obj.plot_nn_result(THRESHOLD,error_df,test)
        normal, abnormal = ad_obj.export_nn_result(THRESHOLD, result, test,
                                                   OUT_PATH)
        normal.set_index(X_test_index, inplace=True)
        abnormal.set_index(X_test_index, inplace=True)

    if config_job_sub_function == 'SVM':
        model = ad_obj.load_svm_model(MODEL_PATH)
        pred, result = ad_obj.predict_svm_model(model, test)
        #ad_obj.plot_svm_result(X_test, pred)
        normal, abnormal = ad_obj.export_svm_result(test, pred, OUT_PATH)
        normal.set_index(X_test_index, inplace=True)
        abnormal.set_index(X_test_index, inplace=True)

    #print(normal)
    return normal, abnormal
Ejemplo n.º 5
0
def postprocessing(data=pd.DataFrame(),abnormal=pd.DataFrame(),scaler= MinMaxScaler(copy=True, feature_range=(0, 1)),text_field_LE={},unique_val_list={},model_id=''):
    data_index = data.index
    db_record = read_collectionbyid('nnmodels',model_id)
    Scaler_deco_data = pd.DataFrame(scaler.inverse_transform(data),columns=data.columns)
    Scaler_deco_data.set_index(data_index,inplace=True)
    #print('shape')
    #print(Scaler_deco_data.dtypes)

    def ldecoder(data,feature='',le=LabelEncoder()):
        #print(data.dtypes)
        new_col=str(feature)+"_Encoded"
        #print(new_col)
        data[feature]=le.inverse_transform(data[new_col].astype('int'))
        data.drop(new_col,axis=1,inplace=True)
        return data

    for key,tx_f in text_field_LE.items():
        #print(tx_f)
        data = ldecoder(Scaler_deco_data,key,tx_f)
    abnormal.loc[abnormal['Actual'].notnull(),'Actual'] = 'YES'
    #abnormal['Actual'].set_value(abnormal['Actual'].notnull(),value='YES')
    abnormal['Actual'].fillna('NO', inplace=True)
    df_result = pd.concat([data,abnormal['Actual']], axis=1,copy=False)
    df_result.reset_index(level=0, inplace=True)
    df_result.sort_values(db_record['MainField'],inplace=True)
    result_json = df_result.to_json(orient='records')
    insert_record = {}
    insert_record["Timestamp"] = datetime.now()
    insert_record["Model_id"] = model_id
    insert_record["Result"] = result_json
    insert_record.update(unique_val_list)
    #print(insert_record)
    #print(model_id)
    #print(str(unique_val_list))
    insert_collection("nnresults", insert_record)
    #update_collection("nnresults","Model_id",model_id, unique_val_list)
    #print(e)
    #print(result_json)
Ejemplo n.º 6
0
def trainmodel(data_processed=pd.DataFrame(), model_id=''):
    Local_File_Path = os.path.dirname(os.path.realpath(__file__))
    MODEL_PATH = Local_File_Path + "/Model/model.h5"
    MODEL_SAVE_PATH = Local_File_Path + "/Model/model.json"
    TB_LOG_DIR = Local_File_Path + "/TB/" + model_id + "/"
    if not os.path.exists(TB_LOG_DIR):
        os.makedirs(TB_LOG_DIR)
        os.chmod(TB_LOG_DIR, 0o755)
    OUT_PATH = "/node-parser/data/Results/"
    RANDOM_STATE = 77
    THRESHOLD = 3  #2*STD away from mean will be
    logger = logging.getLogger(os.path.basename(__file__))
    #logger = __get_logger()
    db_record = read_collectionbyid('nnmodels', model_id)
    config_over_field_name = db_record['MainField']
    config_job_function = db_record['ModelType']
    config_job_sub_function = db_record['ModelMethod']

    if config_job_function == 'anomaly':
        # Split data into train/test
        test_columns = list(data_processed.columns)
        ix = test_columns.index(config_over_field_name)
        train, test = train_test_split(data_processed, \
                                       test_size=0.2, \
                                       random_state=77)
        X_test_pd = pd.DataFrame(test)
        #print('split')
        #print(X_test_pd)
        X_train = pd.DataFrame(train.iloc[:, ix])
        X_test = pd.DataFrame(test.iloc[:, ix])
        X_test_index = X_test.index

        # Build the Model
        ad_obj = AD.AnomalyDetection(logger)

        if config_job_sub_function == 'NN':
            X_train = X_train.values.reshape(X_train.shape[0], 1)
            X_test = X_test.values.reshape(X_test.shape[0], 1)
            encoding_dim = int(math.log(X_train.shape[0]))
            input_dim = X_train.shape[1]
            nn_model = ad_obj.build_nn_model(encoding_dim, input_dim)
            nn_model = ad_obj.compile_nn_model(nn_model)
            # Run Model
            nb_epoch = int(math.pow(X_train.shape[0], 3 / 5))
            batch_size = int(math.sqrt(X_train.shape[0]))
            nn_model, history = ad_obj.run_nn_model(
                nn_model,
                X_train,
                X_test,
                nb_epoch,
                batch_size,
                MODEL_PATH,
                TB_LOG_DIR,
            )

            # Visualize Training History
            # ad_obj.depict_nn_training_hist(history)
            ad_obj.save_nn_model(nn_model, MODEL_SAVE_PATH)
            nn_history_file = OUT_PATH + "nn_history.json"
            ad_obj.export_nn_training_result(history, nn_history_file)
            result = ad_obj.predict_nn_model(nn_model, X_test)
            result.set_index(X_test_index, inplace=True)
            # ad_obj.plot_nn_result(THRESHOLD,error_df,test)
            normal, abnormal = ad_obj.export_nn_result(THRESHOLD, result, test,
                                                       OUT_PATH)
            normal.set_index(X_test_index, inplace=True)
            abnormal.set_index(X_test_index, inplace=True)

        if config_job_sub_function == 'SVM':
            svm_model = ad_obj.build_svm_model(X_train)
            ad_obj.save_svm_model(MODEL_PATH)
            pred, result = ad_obj.predict_svm_model(svm_model, X_test)
            result.set_index(X_test_index, inplace=True)
            #ad_obj.plot_svm_result(X_test,pred)
            normal, abnormal = ad_obj.export_svm_result(X_test, pred, OUT_PATH)
            normal.set_index(X_test_index, inplace=True)
            abnormal.set_index(X_test_index, inplace=True)

    # test_columns = list(data_processed.columns)
    # ix = test_columns.index(config_over_field_name)
    # test = pd.DataFrame(data_processed.iloc[:, ix])
    # if config_job_sub_function == 'NeuralNet':
    #     X_test=test.values.reshape(X_test.shape[0], 1)
    #     # Build the Model
    #     ad_obj = AD.AnomalyDetection(logger)
    #     #Load Model
    #     model=ad_obj.load_nn_model(MODEL_PATH)
    #     model=ad_obj.compile_nn_model(model)
    #     result = ad_obj.predict_nn_model(nn_model, X_test)
    #     # ad_obj.plot_nn_result(THRESHOLD,error_df,test)
    #     normal, abnormal = ad_obj.export_nn_result(THRESHOLD, result, test, OUT_PATH)
    #
    # if config_job_sub_function == 'SVM':
    #     model=ad_obj.load_svm_model(MODEL_PATH)
    #     pred, result = ad_obj.predict_svm_model(svm_model, test)
    #     ad_obj.plot_svm_result(X_test, pred)
    #     normal, abnormal = ad_obj.export_svm_result(test, pred, OUT_PATH)

    return X_test_pd, normal, abnormal
def pre_processing(raw_input_data=pd.DataFrame(), model_id=''):
    db_record = read_collectionbyid('nnmodels', model_id)
    if db_record['TrainConId'] == '':
        con_id = db_record['ConId']
    else:
        con_id = db_record['TrainConId']
    field_list = read_collectionbyfield('nnfieldslist', 'Con_id', con_id)
    fiel_l = field_list['Field_list'].split('|')
    fiel_t = field_list['Field_Type_List'].split('|')
    #print(fiel_l)
    field_type = {}
    for i in range(len(fiel_l)):
        field_type[fiel_l[i]] = fiel_t[i]
    Main_Field = db_record['MainField']
    Main_Field_Type = field_type[Main_Field]
    Time_Field = db_record['TimeSeriesField']
    filter = ''
    filter_value = 200.02
    filter_value1 = 0
    Inf_Field = []
    IntervalSpan = db_record['IntervalSpan'].replace('~', '')
    if IntervalSpan == '': IntervalSpan = None
    for x in db_record['FieldsSelected'].split(','):
        Inf_Field.append(x)
    fun = db_record['AggregationFun']
    text_field_list = []
    unique_val_list = {}
    #print(raw_input_data.head())
    for Field in Inf_Field:
        #unique_val_list[Field] = list(raw_input_data[Field].unique())
        if field_type[Field] == 'str':
            text_field_list.append(Field)

    def lencoder(data, feature):
        le = LabelEncoder()
        new_col = str(feature) + "_Encoded"
        data[new_col] = le.fit_transform(data[feature].astype(str))
        #print(le.inverse_transform(data[new_col]))
        data.drop(feature, axis=1, inplace=True)
        return data, le

    def minmaxscaler(data, time_field):
        scaler = MinMaxScaler(copy=False, feature_range=(0, 1))
        data.set_index(time_field, inplace=True)
        data_index = data.index
        #data.drop(time,axis=1,inplace=True)
        #print(data.columns)
        data = pd.DataFrame(scaler.fit_transform(data), columns=data.columns)
        data.set_index(data_index, inplace=True)
        return data, scaler

    raw_input_data = raw_input_data[list([Time_Field, Main_Field]) + Inf_Field]
    #raw_input_data = raw_input_data.filter()
    raw_input_data = raw_input_data.replace('', np.nan, inplace=False)
    raw_input_data = raw_input_data.dropna(inplace=False, axis=0)
    if Main_Field_Type == 'int' or Main_Field_Type == 'float':
        raw_input_data[Main_Field] = pd.to_numeric(raw_input_data[Main_Field],
                                                   errors='coerce')
    if filter == '=':
        raw_input_data = raw_input_data[raw_input_data[Main_Field] ==
                                        filter_value]
    elif filter == '!=':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field] != filter_value]
    elif filter == '>':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field] > filter_value]
    elif filter == '<':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field] < filter_value]
    elif filter == '>=':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field] >= filter_value]
    elif filter == '<=':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field] <= filter_value]
    elif filter == 'like':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field].str.contains(filter_value)]
    elif filter == 'btw':
        raw_input_data = raw_input_data[
            raw_input_data[Main_Field] >= filter_value
            & raw_input_data[Main_Field] <= filter_value1]

    #print(raw_input_data)

    try:
        raw_input_data[Time_Field] = raw_input_data[Time_Field].apply(
            lambda x: pen.parse(x))
    except:
        raw_input_data[Time_Field] = raw_input_data[Time_Field].apply(
            lambda x: datetime.fromtimestamp(
                mktime(time.strptime(x, '%d/%b/%Y:%H:%M:%S %z'))))
    #raw_input_data[Time_Field] = pd.to_datetime(raw_input_data[Time_Field],format='%d/%b/%Y:%H:%M:%S %z')
    #print(raw_input_data.columns)
    #raw_input_data[Time_Field]=raw_input_data[Time_Field].apply(lambda x: datetime.datetime.strftime(x))
    #raw_input_data['time1'] = raw_input_data[Time_Field].apply(lambda x: dateparser.parse(x))
    if (fun == 'sum'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.sum, axis=0).reset_index()
    if (fun == 'count'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].agg('count').reset_index()
    if (fun == 'average'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.average, axis=0).reset_index()
    if (fun == 'mean'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.mean, axis=0).reset_index()
    if (fun == 'median'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.median, axis=0).reset_index()
    if (fun == 'product'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.prod, axis=0).reset_index()
    if (fun == 'maximum'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.max, axis=0).reset_index()
    if (fun == 'maximum'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.min, axis=0).reset_index()
    if (fun == 'cumsum'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.cumsum, axis=0).reset_index()
    if (fun == 'cumprod'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].apply(np.cumprod, axis=0).reset_index()
    if (fun == 'cumsum'):
        Grouped_input_data = raw_input_data.sort_values(
            Time_Field, ascending=True).groupby(
                list([pd.Grouper(key=Time_Field, freq=IntervalSpan)]) +
                Inf_Field)[Main_Field].cumsum().reset_index()

    #raw_input_data.replace('', np.nan, inplace=True)
    #print(Grouped_input_data)
    #raw_input_data.fillna(method='ffill')
    text_field_LE = {}
    for tx_f in text_field_list:
        Encoded_input_data, text_field_LE[tx_f] = lencoder(
            Grouped_input_data, tx_f)
    #print(Encoded_input_data)
    #print(Encoded_input_data)
    Model_input_Data, scaler = minmaxscaler(Encoded_input_data, Time_Field)
    #print(Model_input_Data)
    #print(scaler)
    #print(datetime)
    return Model_input_Data, text_field_LE, scaler
Ejemplo n.º 8
0
def file_stream(con_id, dir_path, lines_count, model_id):
    raw_input_data = pd.DataFrame()
    count = 0
    i = 0
    error_count = 0
    f_count = 0
    start = 0
    end = lines_count
    rem_rows = 0
    os.chdir(dir_path)
    files = sorted(os.listdir(dir_path), key=os.path.getmtime)
    oldest = files[0]
    newest = files[-1]
    # print("Oldest:",oldest)
    # print("Newest:", newest)
    print("List of JSON files Found:", files, ".........")
    df_l = []
    dic_l = []
    db_record = read_collectionbyid('nnmodels', model_id)
    TimeSeriesField = db_record['TimeSeriesField']
    IntervalSpan = db_record['IntervalSpan'].replace('~', '')
    if IntervalSpan == '': IntervalSpan = None

    span_timestamp = None
    counter = 0
    try:
        num_files = len([
            f for f in os.listdir(dir_path)
            if os.path.isfile(os.path.join(dir_path, f))
        ])
        #print("num_files",num_files)

        while f_count < num_files:
            with open(files[f_count]) as myfile:
                try:
                    print("Reading file:", files[f_count])
                    chunk = list(islice(myfile, 0, None))
                    #print(len(chunk))
                    #print("chunk length:",len(chunk))
                    #print("old f_count,start,end:",f_count,start,end)
                    f_count = f_count + 1

                    for record in chunk:
                        try:
                            s = record
                            result = json.loads(s)  # try to parse...
                            if span_timestamp == None:
                                counter = 0
                            #print(record)
                            value, typ = convert_table(record, counter)
                            #print(value)
                            span_timestamp, df_l, dic_l = data_grouper(
                                value, typ, span_timestamp, model_id, df_l,
                                dic_l, TimeSeriesField, IntervalSpan)
                            counter += 1
                            count += 1
                            print("\r Records Read... " + str(count), end="")
                            # print("Original", raw_input_data)
                            # print(raw_input_data.head())
                            # print(raw_input_data.shape)
                            # break  # parsing worked -> exit loop

                        except Exception as e:
                            # print("Exception"+str(e))
                            # "Expecting , delimiter"
                            # position of unexpected character after '"'
                            #-unexp = int(re.findall(r'\(char (\d+)\)', str(e))[0])
                            # position of unescaped '"' before that
                            #-unesc = s.rfind(r'"', 0, unexp)
                            #-s = s[:unesc] + r'\"' + s[unesc + 1:]
                            # position of correspondig closing '"' (+2 for inserted '\')
                            #-closg = s.find(r'"', unesc + 2)
                            #-s = s[:closg] + r'\"' + s[closg + 1:]
                            #-res = json.dumps(result)
                            # res.strip('\'"')
                            #-value, typ = convert_table(record, count)
                            #-if typ == 'dict':
                            #-dic_l.append(value)
                            #-else:
                            #-df_l.append(value)
                            #-count += 1
                            # print("Formatted", raw_input_data)
                            # print(raw_input_data.head())
                            # print(raw_input_data.shape)
                            raise
                        except:
                            error_count += 1
                            logging.error("Errror while parsing json" +
                                          str(sys.exc_info()))
                            raise

                except:
                    logging.info("Processed all records..")
                    raise

        if len(dic_l) > 0 and len(df_l) > 0:
            raw_input_data_dic = json_normalize(dic_l, sep='~')
            raw_input_data_df = pd.concat(df_l, axis=0)
            raw_input_data = raw_input_data_dic.append(raw_input_data_df,
                                                       ignore_index=False)
        elif len(df_l) > 0:
            raw_input_data = pd.concat(df_l, axis=0)
        elif len(dic_l) > 0:
            raw_input_data = json_normalize(dic_l, sep='~')
        #print(raw_input_data)

        return raw_input_data

    except:
        logging.error("Unable to open file.." + str(sys.exc_info()))
        raise