Ejemplo n.º 1
0
def process_02(lc):
    if lc[0].shape[0] < 20:
        return np.full(63, np.nan, dtype=np.float32)
    fs = feets.FeatureSpace(data=['time', 'magnitude', 'error'])
    try:
        _, values = fs.extract(*lc)
    except:
        return np.full(63, np.nan, dtype=np.float32)
    return values.astype(np.float32)
Ejemplo n.º 2
0
 def setup_method(self):
     features = []
     feets_features = []
     for cls in _FeetsTest.__subclasses__():
         if cls.feets_feature is None or not hasattr(cls, "feature"):
             continue
         features.append(cls.feature)
         feets_features.append(cls.feets_feature)
     self.feature = lc.Extractor(*features)
     self.feets_extractor = feets.FeatureSpace(
         only=feets_features, data=["time", "magnitude", "error"])
Ejemplo n.º 3
0
def feets_parallel(i, times, mags, errs,  koi_n, save_f):
    resumen = pd.read_csv(save_f)
    if koi_n in resumen["KOI Name"].values:
        print ("Se omite curva %d: %s "%(i,koi_n))
    else:
        start_time= time.time()
        print ("Trabajando en curva %d: %s... "%(i,koi_n), end='')

        fs = feets.FeatureSpace(data=['time','magnitude','error'])
        features, values = fs.extract(time=times,magnitude=mags, error=errs)

        res = [koi_n]+list(values.T)
        safe_write(save_f, res) #read and save
        print("Terminado! en %f segundos"%(time.time()-start_time))
Ejemplo n.º 4
0
    def setup(self):
        raise Exception("Add vs_catalog and version")

        print("min_observation:", self.min_observation)
        print("chunk_size:", self.chunk_size)
        print("write_limit:", self.write_limit)
        print("mp_cores:", self.mp_cores)
        print("mp_split:", self.mp_split)
        self.fs = feets.FeatureSpace(data=["magnitude", "time", "error"],
                                     exclude=[
                                         "SlottedA_length", "StetsonK_AC",
                                         "StructureFunction_index_21",
                                         "StructureFunction_index_31",
                                         "StructureFunction_index_32"
                                     ])
Ejemplo n.º 5
0
def get_feet(data,fea):
    t=[]
    for i in fea:
        fs = feets.FeatureSpace(only = ['Amplitude',#'Con', # 'Autocor_length',
                        'Gskew',
                       # 'Meanvariance',
                  # 'MedianAbsDev', 'MedianBRP',
                        'PairSlopeTrend',
                   # 'Autocor_length',
                   # 'AndersonDarling',
                        'Q31',
                        'Rcs',
                   'SmallKurtosis'])
        _, values = fs.extract(magnitude=data[i])
        a = dict(zip(_,values))
        a = pd.DataFrame(a,index=[0])
        a.columns =[i+'_'+j for j in a.columns]
        t.append(a)
        rst = t[0]
        for val in t[1:]:
            rst = pd.concat([rst,val],axis=1)
            rst.reset_index(drop=True, inplace=True)
    return rst
Ejemplo n.º 6
0
def get_feats_blank(field, year, month, day, min_num, max_num):
    path = "/fred/oz100/NOAO_archive/archive_NOAO_data/data_outputs/" + year + "/" + month + "/" + field + "/g_band/single/lightcurves/files/"
    #print(ath)

    lc_with_only_zeros = []
    used_lcs = []
    filenames = []

    Autocor_length = []
    Beyond1Std = []
    CAR_sigma = []
    CAR_mean = []
    CAR_tau = []
    Con = []
    Eta_e = []
    LinearTrend = []
    MaxSlope = []
    Mean = []
    Meanvariance = []
    MedianAbsDev = []
    MedianBRP = []
    PairSlopeTrend = []
    PercentAmplitude = []
    Q31 = []
    Rcs = []
    Skew = []
    SlottedA_length = []
    SmallKurtosis = []
    Std = []
    StetsonK_AC = []
    Amplitudes = []
    VariabilityIndex = []
    pmra = []
    pmde = []
    gaia_G_RP = []
    gaia_BP_G = []
    detection_fraction = []
    hl_ratio = []
    test_filename = os.listdir(path)
    #print(test_filename[0:1])
    amp1_val = []
    amp_2_1_ratio = []
    amp_3_1_ratio = []
    phase_2_1_ratio = []
    phase_3_1_ratio = []

    #print(len(test_filename))

    for filename in test_filename[min_num:max_num]:
        if filename.endswith(day):
            try:
                mjd, mag, emag, uplim = np.loadtxt(path + filename,
                                                   unpack=True,
                                                   skiprows=1)
        #print mjd
            except:
                print('FILE EMPTY)')
        #mjd = np.loadtxt(path+i, usecols =1, skiprows =1)
            sum_mag = np.sum(mag)
            print(sum_mag)
            if sum_mag == 0:
                lc_with_only_zeros.append(filename)
            elif sum_mag != 0:
                used_lcs.append(filename)

    #print(flux)

    # Remove the non detections
            clean_mjd = []
            clean_mag = []
            clean_emag = []
            for l, m, n in zip(mjd, mag, emag):
                if m != 0:
                    if m < 25 and n < 0.8:
                        clean_mjd.append(l)
                        clean_mag.append(m)
                        clean_emag.append(n)
                    elif m == 0:
                        pass

        #print(clean_mjd, clean_mag, clean_emag)
            print(clean_mjd)
            if len(clean_mjd) > 3:
                print('in cleaned loop')
                lc = np.array([clean_mag, clean_mjd, clean_emag])

                fs = feets.FeatureSpace(only=[
                    'Autocor_length', 'Beyond1Std', 'CAR_sigma', 'CAR_mean',
                    'CAR_tau', 'Con', 'LinearTrend', 'MaxSlope', 'Mean',
                    'Meanvariance', 'MedianAbsDev', 'MedianBRP',
                    'PairSlopeTrend', 'PercentAmplitude', 'Q31', 'Rcs', 'Skew',
                    'SlottedA_length', 'SmallKurtosis', 'Std', 'StetsonK_AC'
                ])
                features, values = fs.extract(*lc)
                results = dict(zip(features, values))
                #except:
                #	print filename
                print('after feets')
                filenames.append(filename)
                Autocor_length.append(results['Autocor_length'])
                Beyond1Std.append(results['Beyond1Std'])
                CAR_sigma.append(results['CAR_sigma'])
                CAR_mean.append(results['CAR_mean'])
                CAR_tau.append(results['CAR_tau'])
                Con.append(results['Con'])
                #Eta_e.append(results['Eta_e'])
                LinearTrend.append(results['LinearTrend'])
                MaxSlope.append(results['MaxSlope'])
                Mean.append(results['Mean'])
                Meanvariance.append(results['Meanvariance'])
                MedianAbsDev.append(results['MedianAbsDev'])
                MedianBRP.append(results['MedianBRP'])
                PairSlopeTrend.append(results['PairSlopeTrend'])
                PercentAmplitude.append(results['PercentAmplitude'])
                Q31.append(results['Q31'])
                Rcs.append(results['Rcs'])
                Skew.append(results['Skew'])
                SlottedA_length.append(results['SlottedA_length'])
                SmallKurtosis.append(results['SmallKurtosis'])
                Std.append(results['Std'])
                StetsonK_AC.append(results['StetsonK_AC'])
                N = len(clean_mag)
                sorted_mag = np.sort(clean_mag)
                amp = (np.median(sorted_mag[-int(math.ceil(0.05 * N)):]) -
                       np.median(sorted_mag[0:int(math.ceil(0.05 * N))])) / 2
                Amplitudes.append(amp)
                N = len(clean_mag)
                clean_mag_array = np.asarray(clean_mag)
                sigma2 = np.var(clean_mag)
                VarIndex = 1 / ((N - 1) * sigma2) * np.sum(
                    np.power(clean_mag_array[1:] - clean_mag_array[:-1], 2))
                VariabilityIndex.append(VarIndex)
                non_detects = []
                for lim in uplim:
                    if lim != 0:
                        non_detects.append(lim)
                        len_mags = len(clean_mag)
                        len_uplim = len(non_detects)
                        if len_uplim == 0:
                            detection_fraction.append(1)
                        elif len_uplim != 0:
                            detection_fraction.append(len_mags / len_uplim)
                fft = np.fft.rfft(clean_mag)
                amps = np.sqrt(fft.real**2 + fft.imag**2)
                amp1 = amps[0]
                amp1_val.append(amp1)
                amp2 = amps[1]
                amp3 = amps[2]
                amp_2_1 = amp2 / amp1
                amp_3_1 = amp3 / amp1
                amp_2_1_ratio.append(amp_2_1)
                amp_3_1_ratio.append(amp_3_1)
                phases = np.arctan2(fft.imag, fft.real)
                phase1 = phases[0]
                phase2 = phases[1]
                phase3 = phases[2]
                phase_2_1 = phase2 / phase1
                phase_3_1 = phase3 / phase1
                phase_2_1_ratio.append(phase_2_1)
                phase_3_1_ratio.append(phase_3_1)

            else:
                print('Not enough data points')

    #print 'Not used'
    #print len(lc_with_only_zeros)
    #print 'Used'
    #print len(used_lcs)
    feature_table = Table()
    feature_table['LC_name'] = filenames
    feature_table['Autocor_length'] = Autocor_length
    feature_table['Beyond1Std'] = Beyond1Std
    feature_table['CAR_sigma'] = CAR_sigma
    feature_table['CAR_mean'] = CAR_mean
    feature_table['CAR_tau'] = CAR_tau
    feature_table['Con'] = Con
    feature_table['LinearTrend'] = LinearTrend
    feature_table['MaxSlope'] = MaxSlope
    feature_table['Mean'] = Mean
    feature_table['Meanvariance'] = Meanvariance
    feature_table['MedianAbsDev'] = MedianAbsDev
    feature_table['MedianBRP'] = MedianBRP
    feature_table['PairSlopeTrend'] = PairSlopeTrend
    feature_table['PercentAmplitude'] = PercentAmplitude
    feature_table['Q31'] = Q31
    feature_table['Rcs'] = Rcs
    feature_table['Skew'] = Skew
    feature_table['SlottedA_length'] = SlottedA_length
    feature_table['SmallKurtosis'] = SmallKurtosis
    feature_table['Std'] = Std
    feature_table['StetsonK_AC'] = StetsonK_AC
    feature_table['Amplitudes'] = Amplitudes
    feature_table['VariabilityIndex'] = VariabilityIndex
    #feature_table['DetectionFraction'] = detection_fraction
    feature_table['amp1'] = amp1_val
    feature_table['amp_2_1_ratio'] = amp_2_1_ratio
    feature_table['amp_3_1_ratio'] = amp_3_1_ratio
    feature_table['phase_2_1_ratio'] = phase_2_1_ratio
    feature_table['phase_3_1_ratio'] = phase_3_1_ratio
    output = '/home/swebb/oz100/LC_CLUSTERS/feature_lists/' + year + '_' + month + '_' + field + '_' + day + '/' + year + '_' + month + '_' + field + '_' + day + '_feat_' + str(
        max_num) + '.csv'
    print(output)
    df = feature_table.to_pandas()
    df.to_csv(output)
    #feature_table.write(output, format = 'ascii', overwrite = True)
    return feature_table
Ejemplo n.º 7
0
    else:
        start_time = time.time()
        print("Trabajando en curva %d: %s... " % (i, koi_n), end='')
        sys.stdout.flush()

        fs = feets.FeatureSpace(data=['time', 'magnitude'])
        features, values = fs.extract(time=times, magnitude=mags)

        res = [koi_n] + list(values.T)
        safe_write(save_f, res)  #read and save
        print("Terminado! en %f segundos" % (time.time() - start_time))


name_saved_file = "Feets_Features/ResumenFeets_sinError_seq.csv"
if not os.path.isfile(name_saved_file):
    fs = feets.FeatureSpace(data=['time', 'magnitude'])
    resumen = pd.DataFrame(columns=['KOI Name'] + list(fs.features_as_array_))
    resumen.to_csv(name_saved_file, index=False)

start_i = pd.read_csv(name_saved_file).shape[0]  #leido desde archivo
if start_i % cores != 0:
    start_i = int(start_i / cores) * cores
if start_i == N:
    sys_out.write("Ya se realizó!")
    assert False
print("Comienza ejecucion en ", start_i)

for i in range(N):
    feets_parallel(
        i,
        coupled_time[i],
Ejemplo n.º 8
0
    #class name based labels
    label_features = ['class_' + str(cl) for cl in all_classes]

    return target_map, label_features, all_classes, all_class_weights


#Q31
try:
    import sys
    ft = sys.argv[1]
except IndexError:
    print("using default feets")
    ft = ['Beyond1Std']
#    ft = ['Eta_e', 'Amplitude', 'Autocor_length', 'Beyond1Std']
fs = feets.FeatureSpace(only=ft)
print(ft)


def lcperiod(df_main):
    df_main = df_main.sort_values('mjd')
    try:
        frequency, power = LombScargle(
            df_main['mjd'], df_main['flux'],
            dy=df_main['flux_err']).autopower(nyquist_factor=1)
        period = 1 / frequency[np.argmax(power)]
        power = power.mean()
    except ValueError:
        period = 0
        power = 0
Ejemplo n.º 9
0
    'Freq1_harmonics_amplitude_3', 'Freq1_harmonics_rel_phase_1',
    'Freq1_harmonics_rel_phase_2', 'Freq1_harmonics_rel_phase_3',
    'Freq2_harmonics_amplitude_0', 'Freq2_harmonics_amplitude_1',
    'Freq2_harmonics_amplitude_2', 'Freq2_harmonics_amplitude_3',
    'Freq2_harmonics_rel_phase_1', 'Freq2_harmonics_rel_phase_2',
    'Freq2_harmonics_rel_phase_3', 'Freq3_harmonics_amplitude_0',
    'Freq3_harmonics_amplitude_1', 'Freq3_harmonics_amplitude_2',
    'Freq3_harmonics_amplitude_3', 'Freq3_harmonics_rel_phase_1',
    'Freq3_harmonics_rel_phase_2', 'Freq3_harmonics_rel_phase_3', 'Gskew',
    'LinearTrend', 'MaxSlope', 'Mean', 'Meanvariance', 'MedianAbsDev',
    'MedianBRP', 'PairSlopeTrend', 'PercentAmplitude',
    'PercentDifferenceFluxPercentile', 'PeriodLS', 'Period_fit', 'Psi_CS',
    'Psi_eta', 'Q31', 'Rcs', 'Skew', 'SmallKurtosis', 'Std', 'StetsonK'
]

fs = feets.FeatureSpace(data=["magnitude", "time", "error"], only=only_all)


def extract(sid, obs, old_feats):
    time = obs.pwp_stack_src_hjd.values
    magnitude = obs.pwp_stack_src_mag3.values
    error = obs.pwp_stack_src_mag_err3.values

    sort = np.argsort(time)
    time, magnitude, error = time[sort], magnitude[sort], error[sort]
    time, magnitude, error = preprocess.remove_noise(time,
                                                     magnitude,
                                                     error,
                                                     std_limit=3)

    new_feats = dict(
Ejemplo n.º 10
0
    descr[2] = (descr[2][0], '|S13')
    descr = [(str(n), t) for n, t in descr]
    dt = np.dtype(descr)

    return sources.astype(dt)


fs = feets.FeatureSpace(
    data=["magnitude", "time", "error"],
    only=[
        "PeriodLS", "Period_fit",
        "Psi_CS", "Psi_eta",
        "Freq1_harmonics_amplitude_0", "Freq1_harmonics_amplitude_1",
        "Freq1_harmonics_amplitude_2", "Freq1_harmonics_amplitude_3",
        "Freq2_harmonics_amplitude_0", "Freq2_harmonics_amplitude_1",
        "Freq2_harmonics_amplitude_2", "Freq2_harmonics_amplitude_3",
        "Freq3_harmonics_amplitude_0", "Freq3_harmonics_amplitude_1",
        "Freq3_harmonics_amplitude_2", "Freq3_harmonics_amplitude_3",
        "Freq1_harmonics_rel_phase_0", "Freq1_harmonics_rel_phase_1",
        "Freq1_harmonics_rel_phase_2", "Freq1_harmonics_rel_phase_3",
        "Freq2_harmonics_rel_phase_0", "Freq2_harmonics_rel_phase_1",
        "Freq2_harmonics_rel_phase_2", "Freq2_harmonics_rel_phase_3",
        "Freq3_harmonics_rel_phase_0", "Freq3_harmonics_rel_phase_1",
        "Freq3_harmonics_rel_phase_2", "Freq3_harmonics_rel_phase_3"])

import ipdb; ipdb.set_trace()


def main():
    with db.session_scope() as ses:
        query = ses.query(LightCurves).filter(LightCurves.tile.has(name="b278"))
Ejemplo n.º 11
0
from tsfresh.feature_extraction import extract_features
from tsfresh.feature_extraction import feature_calculators
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

from numba import jit
from filby import *
from tsfresh_extra import *

np.random.seed(35)

import math
import feets
import feets.preprocess

fs = feets.FeatureSpace(data=['magnitude', 'time', 'error'],
                        only=['StetsonK', 'SlottedA_length', 'StetsonK_AC'])

import time

feature_calculators.__dict__[
    "FluxPercentileRatioMid80"] = FluxPercentileRatioMid80
feature_calculators.__dict__[
    "FluxPercentileRatioMid20"] = FluxPercentileRatioMid20
feature_calculators.__dict__[
    "FluxPercentileRatioMid35"] = FluxPercentileRatioMid35
feature_calculators.__dict__[
    "FluxPercentileRatioMid50"] = FluxPercentileRatioMid50
feature_calculators.__dict__[
    "FluxPercentileRatioMid65"] = FluxPercentileRatioMid65

feature_calculators.__dict__["SmallKurtosis"] = SmallKurtosis
Ejemplo n.º 12
0
 def setup_method(self):
     self.feets_extractor = feets.FeatureSpace(
         only=[self.feets_feature], data=["time", "magnitude", "error"])
Ejemplo n.º 13
0
    'Freq3_harmonics_rel_phase_3', 'Gskew', 'LinearTrend', 'MaxSlope', 'Mean',
    'Meanvariance', 'MedianAbsDev', 'MedianBRP', 'PairSlopeTrend',
    'PercentAmplitude', 'PercentDifferenceFluxPercentile', 'PeriodLS',
    'Period_fit', 'Psi_CS', 'Psi_eta', 'Q31', 'Rcs', 'Skew', 'SmallKurtosis',
    'Std', 'StetsonK'
]

COLUMNS_NO_FEATURES = ['id', 'cnt', 'ra_k', 'dec_k', 'vs_type', 'vs_catalog']

COLUMNS_TO_PRESERVE = COLUMNS_NO_FEATURES + [
    'c89_jk_color', 'c89_hk_color', 'c89_jh_color', 'n09_jk_color',
    'n09_hk_color', 'n09_jh_color', 'c89_m2', 'c89_m4', 'c89_c3', 'n09_m2',
    'n09_m4', 'n09_c3', 'ppmb', "PeriodLS"
]

FEATURE_SPACE = feets.FeatureSpace(data=["magnitude", "time", "error"],
                                   only=FEATURES_TO_CALULATE)

# =============================================================================
# FUNCTIONS
# =============================================================================


def sigma_clip(obs):
    time = obs.pwp_stack_src_hjd.values
    magnitude = obs.pwp_stack_src_mag3.values
    error = obs.pwp_stack_src_mag_err3.values

    sort = np.argsort(time)
    time, magnitude, error = time[sort], magnitude[sort], error[sort]

    time, magnitude, error = preprocess.remove_noise(time,