def repetitive_group_recognition(self, show=False, clean_attrs=True): df_nontext = rep.recog_repetition_nontext(self, show) df_text = rep.recog_repetition_text(self, show) df = self.compos_dataframe df = df.merge(df_nontext, how='left') df.loc[df['alignment'].isna(), 'alignment'] = df_text['alignment'] df = df.merge(df_text, how='left') df.rename({'alignment': 'alignment_in_group'}, axis=1, inplace=True) if clean_attrs: df = df.drop(list(df.filter(like='cluster')), axis=1) df = df.fillna(-1) for i in range(len(df)): if df.iloc[i]['group_nontext'] != -1: df.loc[i, 'group'] = 'nt-' + str( int(df.iloc[i]['group_nontext'])) elif df.iloc[i]['group_text'] != -1: df.loc[i, 'group'] = 't-' + str(int(df.iloc[i]['group_text'])) groups = df.groupby('group').groups for i in groups: if len(groups[i]) == 1: df.loc[groups[i], 'group'] = -1 df.group = df.group.fillna(-1) # df = rep.rm_invalid_groups(df) self.compos_dataframe = df
def repetitive_group_recognition(self, show=False, clean_attrs=True): # call from notebook df_nontext = rep.recog_repetition_nontext( self, show) #call this func recog_repetition_nontext df_text = rep.recog_repetition_text(self, show) #call this func recog_repetition_text df = self.compos_dataframe #original data frame df = df.merge(df_nontext, how='left') #print(df) #merged with nontext #print(df_text['alignment']) if 'alignment' not in df.columns: #print(True) NaN = np.nan df["alignment"] = NaN # print(df) # print("df_text") # print(df_text) df.loc[df['alignment'].isna(), 'alignment'] = df_text['alignment'] df = df.merge(df_text, how='left') df.rename({'alignment': 'alignment_in_group'}, axis=1, inplace=True) if clean_attrs: df = df.drop(list(df.filter(like='cluster')), axis=1) df = df.fillna(-1) for i in range(len(df)): if df.iloc[i]['group_nontext'] != -1: df.loc[i, 'group'] = 'nt-' + str( int(df.iloc[i]['group_nontext'])) elif df.iloc[i]['group_text'] != -1: df.loc[i, 'group'] = 't-' + str(int(df.iloc[i]['group_text'])) groups = df.groupby('group').groups for i in groups: if len(groups[i]) == 1: df.loc[groups[i], 'group'] = -1 df.group = df.group.fillna(-1) # df = rep.rm_invalid_groups(df) self.compos_dataframe = df
def repetitive_group_recognition(self, show=False, clean_attrs=True): df_nontext = rep.recog_repetition_nontext(self, show) df_text = rep.recog_repetition_text(self, show) df = self.compos_dataframe df = df.merge(df_nontext, how='left') df.loc[df['alignment'].isna(), 'alignment'] = df_text['alignment'] df = df.merge(df_text, how='left') if clean_attrs: df = df.drop(list(df.filter(like='cluster')), axis=1) df = df.fillna(-1) for i in range(len(df)): if df.iloc[i]['group_nontext'] != -1: df.loc[i, 'group'] = 'nt-' + str( int(df.iloc[i]['group_nontext'])) elif df.iloc[i]['group_text'] != -1: df.loc[i, 'group'] = 't-' + str(int(df.iloc[i]['group_text'])) # df[list(df.filter(like='group'))] = df[list(df.filter(like='group'))].astype(int) self.compos_dataframe = df